aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp998
-rw-r--r--contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h179
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.cpp54
-rw-r--r--contrib/llvm/lib/CodeGen/AllocationOrder.h89
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp731
-rw-r--r--contrib/llvm/lib/CodeGen/AntiDepBreaker.h67
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp139
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h52
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp2620
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp290
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h74
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp578
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h110
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp2076
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h310
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp600
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp514
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h159
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp246
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h59
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp230
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h109
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h184
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp46
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h193
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp289
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h256
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp179
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp833
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h265
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp1970
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h571
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h95
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp285
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h152
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp180
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h128
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp74
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h49
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp1541
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h391
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp689
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h137
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp123
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp182
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp1273
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h106
-rw-r--r--contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp1640
-rw-r--r--contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp38
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.cpp1919
-rw-r--r--contrib/llvm/lib/CodeGen/BranchFolding.h160
-rw-r--r--contrib/llvm/lib/CodeGen/BuiltinGCs.cpp139
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp236
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp287
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp94
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp5656
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp681
-rw-r--r--contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h106
-rw-r--r--contrib/llvm/lib/CodeGen/DFAPacketizer.cpp325
-rw-r--r--contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp181
-rw-r--r--contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp602
-rw-r--r--contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp264
-rw-r--r--contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp817
-rw-r--r--contrib/llvm/lib/CodeGen/EdgeBundles.cpp97
-rw-r--r--contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp811
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp74
-rw-r--r--contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp223
-rw-r--r--contrib/llvm/lib/CodeGen/FaultMaps.cpp150
-rw-r--r--contrib/llvm/lib/CodeGen/FuncletLayout.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadata.cpp177
-rw-r--r--contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp19
-rw-r--r--contrib/llvm/lib/CodeGen/GCRootLowering.cpp355
-rw-r--r--contrib/llvm/lib/CodeGen/GCStrategy.cpp22
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp30
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp164
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp104
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp897
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp107
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp663
-rw-r--r--contrib/llvm/lib/CodeGen/GlobalMerge.cpp605
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp1844
-rw-r--r--contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp576
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp1456
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.cpp250
-rw-r--r--contrib/llvm/lib/CodeGen/InterferenceCache.h238
-rw-r--r--contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp386
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp608
-rw-r--r--contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp311
-rw-r--r--contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp140
-rw-r--r--contrib/llvm/lib/CodeGen/LexicalScopes.cpp332
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugValues.cpp516
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp1047
-rw-r--r--contrib/llvm/lib/CodeGen/LiveDebugVariables.h75
-rw-r--r--contrib/llvm/lib/CodeGen/LiveInterval.cpp1250
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp1575
-rw-r--r--contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp205
-rw-r--r--contrib/llvm/lib/CodeGen/LivePhysRegs.cpp189
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp484
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeCalc.h248
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp449
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRangeUtils.h62
-rw-r--r--contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp197
-rw-r--r--contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp88
-rw-r--r--contrib/llvm/lib/CodeGen/LiveVariables.cpp810
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp420
-rw-r--r--contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp162
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp612
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MILexer.h196
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp2094
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIParser.h99
-rw-r--r--contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp789
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.cpp1005
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrinter.h33
-rw-r--r--contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp71
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp1289
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp200
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp1796
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp93
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCSE.cpp716
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCombiner.cpp477
-rw-r--r--contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp371
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp54
-rw-r--r--contrib/llvm/lib/CodeGen/MachineDominators.cpp157
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunction.cpp1002
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp60
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp89
-rw-r--r--contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp69
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstr.cpp2236
-rw-r--r--contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp344
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLICM.cpp1388
-rw-r--r--contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp84
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp463
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp44
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/MachinePostDominators.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp140
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp529
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp355
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp3553
-rw-r--r--contrib/llvm/lib/CodeGen/MachineSink.cpp857
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp1328
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp2049
-rw-r--r--contrib/llvm/lib/CodeGen/OptimizePHIs.cpp196
-rw-r--r--contrib/llvm/lib/CodeGen/PHIElimination.cpp652
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp59
-rw-r--r--contrib/llvm/lib/CodeGen/PHIEliminationUtils.h25
-rw-r--r--contrib/llvm/lib/CodeGen/ParallelCG.cpp99
-rw-r--r--contrib/llvm/lib/CodeGen/PatchableFunction.cpp88
-rw-r--r--contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp1947
-rw-r--r--contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp98
-rw-r--r--contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp705
-rw-r--r--contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp94
-rw-r--r--contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp168
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp1234
-rw-r--r--contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp139
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.cpp161
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBase.h122
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp298
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocFast.cpp1122
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp2619
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp893
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp142
-rw-r--r--contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp131
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp181
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp3058
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterCoalescer.h116
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterPressure.cpp1350
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp458
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp93
-rw-r--r--contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp388
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStack.cpp851
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackColoring.cpp289
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackColoring.h149
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackLayout.cpp138
-rw-r--r--contrib/llvm/lib/CodeGen/SafeStackLayout.h68
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAG.cpp641
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp1711
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp100
-rw-r--r--contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp240
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp15009
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp2243
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp615
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp1050
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h145
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp4422
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp2139
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp3391
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp1177
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h859
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp555
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp1068
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp3846
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp639
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h127
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp805
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp3020
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp913
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h180
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp279
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp7306
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp9127
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h1019
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp719
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp3591
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp307
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp958
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h118
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp3550
-rw-r--r--contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp468
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrap.cpp556
-rw-r--r--contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp480
-rw-r--r--contrib/llvm/lib/CodeGen/SlotIndexes.cpp245
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.cpp375
-rw-r--r--contrib/llvm/lib/CodeGen/SpillPlacement.h170
-rw-r--r--contrib/llvm/lib/CodeGen/Spiller.h42
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.cpp1525
-rw-r--r--contrib/llvm/lib/CodeGen/SplitKit.h503
-rw-r--r--contrib/llvm/lib/CodeGen/StackColoring.cpp1126
-rw-r--r--contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp172
-rw-r--r--contrib/llvm/lib/CodeGen/StackMaps.cpp552
-rw-r--r--contrib/llvm/lib/CodeGen/StackProtector.cpp471
-rw-r--r--contrib/llvm/lib/CodeGen/StackSlotColoring.cpp469
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplication.cpp65
-rw-r--r--contrib/llvm/lib/CodeGen/TailDuplicator.cpp932
-rw-r--r--contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp104
-rw-r--r--contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp1195
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp1842
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp1098
-rw-r--r--contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp49
-rw-r--r--contrib/llvm/lib/CodeGen/TargetPassConfig.cpp888
-rw-r--r--contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp399
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp300
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp1806
-rw-r--r--contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp220
-rw-r--r--contrib/llvm/lib/CodeGen/VirtRegMap.cpp467
-rw-r--r--contrib/llvm/lib/CodeGen/WinEHPrepare.cpp1235
-rw-r--r--contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp96
239 files changed, 184759 insertions, 0 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
new file mode 100644
index 000000000000..a736884be672
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -0,0 +1,998 @@
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AggressiveAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-sched"
+
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("agg-antidep-debugdiv",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+ cl::desc("Debug control for aggressive anti-dep breaker"),
+ cl::init(0), cl::Hidden);
+
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+ MachineBasicBlock *BB) :
+ NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+ GroupNodeIndices(TargetRegs, 0),
+ KillIndices(TargetRegs, 0),
+ DefIndices(TargetRegs, 0)
+{
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0; i < NumTargetRegs; ++i) {
+ // Initialize all registers to be in their own group. Initially we
+ // assign the register to the same-indexed GroupNode.
+ GroupNodeIndices[i] = i;
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+}
+
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) {
+ unsigned Node = GroupNodeIndices[Reg];
+ while (GroupNodes[Node] != Node)
+ Node = GroupNodes[Node];
+
+ return Node;
+}
+
+void AggressiveAntiDepState::GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
+{
+ for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
+ if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
+ Regs.push_back(Reg);
+ }
+}
+
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
+{
+ assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
+ assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
+
+ // find group for each register
+ unsigned Group1 = GetGroup(Reg1);
+ unsigned Group2 = GetGroup(Reg2);
+
+ // if either group is 0, then that must become the parent
+ unsigned Parent = (Group1 == 0) ? Group1 : Group2;
+ unsigned Other = (Parent == Group1) ? Group2 : Group1;
+ GroupNodes.at(Other) = Parent;
+ return Parent;
+}
+
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
+{
+ // Create a new GroupNode for Reg. Reg's existing GroupNode must
+ // stay as is because there could be other GroupNodes referring to
+ // it.
+ unsigned idx = GroupNodes.size();
+ GroupNodes.push_back(idx);
+ GroupNodeIndices[Reg] = idx;
+ return idx;
+}
+
+bool AggressiveAntiDepState::IsLive(unsigned Reg)
+{
+ // KillIndex must be defined and DefIndex not defined for a register
+ // to be live.
+ return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
+}
+
+AggressiveAntiDepBreaker::AggressiveAntiDepBreaker(
+ MachineFunction &MFi, const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs)
+ : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
+ TII(MF.getSubtarget().getInstrInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
+ State(nullptr) {
+ /* Collect a bitset of all registers that are only broken if they
+ are on the critical path. */
+ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+ BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+ if (CriticalPathSet.none())
+ CriticalPathSet = CPSet;
+ else
+ CriticalPathSet |= CPSet;
+ }
+
+ DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
+ DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
+ r = CriticalPathSet.find_next(r))
+ dbgs() << " " << TRI->getName(r));
+ DEBUG(dbgs() << '\n');
+}
+
+AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
+ delete State;
+}
+
+void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ assert(!State);
+ State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
+
+ bool IsReturnBlock = BB->isReturnBlock();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (const auto &LI : (*SI)->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(MF);
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+}
+
+void AggressiveAntiDepBreaker::FinishBlock() {
+ delete State;
+ State = nullptr;
+}
+
+void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+ PrescanInstruction(MI, Count, PassthruRegs);
+ ScanInstruction(MI, Count);
+
+ DEBUG(dbgs() << "Observe: ");
+ DEBUG(MI.dump());
+ DEBUG(dbgs() << "\tRegs:");
+
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ // If Reg is current live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled). If it is not live but was defined in the
+ // previous schedule region, then set its def index to the most
+ // conservative location (i.e. the beginning of the previous
+ // schedule region).
+ if (State->IsLive(Reg)) {
+ DEBUG(if (State->GetGroup(Reg) != 0)
+ dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg) << "->g0(region live-out)");
+ State->UnionGroups(Reg, 0);
+ } else if ((DefIndices[Reg] < InsertPosIndex)
+ && (DefIndices[Reg] >= Count)) {
+ DefIndices[Reg] = Count;
+ }
+ }
+ DEBUG(dbgs() << '\n');
+}
+
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI,
+ MachineOperand &MO) {
+ if (!MO.isReg() || !MO.isImplicit())
+ return false;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ return false;
+
+ MachineOperand *Op = nullptr;
+ if (MO.isDef())
+ Op = MI.findRegisterUseOperand(Reg, true);
+ else
+ Op = MI.findRegisterDefOperand(Reg);
+
+ return(Op && Op->isImplicit());
+}
+
+void AggressiveAntiDepBreaker::GetPassthruRegs(
+ MachineInstr &MI, std::set<unsigned> &PassthruRegs) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) ||
+ IsImplicitDefUse(MI, MO)) {
+ const unsigned Reg = MO.getReg();
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ PassthruRegs.insert(*SubRegs);
+ }
+ }
+}
+
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
+ SmallSet<unsigned, 4> RegSet;
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
+ if (RegSet.insert(P->getReg()).second)
+ Edges.push_back(&*P);
+ }
+ }
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = nullptr;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ if (SU) {
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ const SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ }
+
+ return (Next) ? Next->getSUnit() : nullptr;
+}
+
+void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
+ const char *tag,
+ const char *header,
+ const char *footer) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // FIXME: We must leave subregisters of live super registers as live, so that
+ // we don't clear out the register tracking information for subregisters of
+ // super registers we're still tracking (and with which we're unioning
+ // subregister definitions).
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) {
+ DEBUG(if (!header && footer) dbgs() << footer);
+ return;
+ }
+
+ if (!State->IsLive(Reg)) {
+ KillIndices[Reg] = KillIdx;
+ DefIndices[Reg] = ~0u;
+ RegRefs.erase(Reg);
+ State->LeaveGroup(Reg);
+ DEBUG(if (header) {
+ dbgs() << header << TRI->getName(Reg); header = nullptr; });
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
+ // Repeat for subregisters. Note that we only do this if the superregister
+ // was not live because otherwise, regardless whether we have an explicit
+ // use of the subregister, the subregister's contents are needed for the
+ // uses of the superregister.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubregReg = *SubRegs;
+ if (!State->IsLive(SubregReg)) {
+ KillIndices[SubregReg] = KillIdx;
+ DefIndices[SubregReg] = ~0u;
+ RegRefs.erase(SubregReg);
+ State->LeaveGroup(SubregReg);
+ DEBUG(if (header) {
+ dbgs() << header << TRI->getName(Reg); header = nullptr; });
+ DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+ State->GetGroup(SubregReg) << tag);
+ }
+ }
+ }
+
+ DEBUG(if (!header && footer) dbgs() << footer);
+}
+
+void AggressiveAntiDepBreaker::PrescanInstruction(
+ MachineInstr &MI, unsigned Count, std::set<unsigned> &PassthruRegs) {
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Handle dead defs by simulating a last-use of the register just
+ // after the def. A dead def can occur because the def is truly
+ // dead, or because only a subregister is live at the def. If we
+ // don't do this the dead def will be incorrectly merged into the
+ // previous def.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
+ }
+
+ DEBUG(dbgs() << "\tDef Groups:");
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI). Inline assembly may
+ // reference either system calls or the register directly. Skip it until we
+ // can tell user specified registers from compiler-specified.
+ if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI) ||
+ MI.isInlineAsm()) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Any aliased that are live at this point are completely or
+ // partially defined here, so group those aliases with Reg.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (State->IsLive(AliasReg)) {
+ State->UnionGroups(Reg, AliasReg);
+ DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
+ TRI->getName(AliasReg) << ")");
+ }
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = nullptr;
+ if (i < MI.getDesc().getNumOperands())
+ RC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Scan the register defs for this instruction and update
+ // live-ranges.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ // Ignore KILLs and passthru registers for liveness...
+ if (MI.isKill() || (PassthruRegs.count(Reg) != 0))
+ continue;
+
+ // Update def for Reg and aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ // We need to be careful here not to define already-live super registers.
+ // If the super register is already live, then this definition is not
+ // a definition of the whole super register (just a partial insertion
+ // into it). Earlier subregister definitions (which we've not yet visited
+ // because we're iterating bottom-up) need to be linked to the same group
+ // as this definition.
+ if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI))
+ continue;
+
+ DefIndices[*AI] = Count;
+ }
+ }
+}
+
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
+ unsigned Count) {
+ DEBUG(dbgs() << "\tUse Groups:");
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // If MI's uses have special allocation requirement, don't allow
+ // any use registers to be changed. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // Inline Assembly register uses also cannot be safely changed.
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservatively here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI.isCall() || MI.hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI) || MI.isInlineAsm();
+
+ // Scan the register uses for this instruction and update
+ // live-ranges, groups and RegRefs.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
+ State->GetGroup(Reg));
+
+ // It wasn't previously live but now it is, this is a kill. Forget
+ // the previous live-range information and start a new live-range
+ // for the register.
+ HandleLastUse(Reg, Count, "(last-use)");
+
+ if (Special) {
+ DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+ State->UnionGroups(Reg, 0);
+ }
+
+ // Note register reference...
+ const TargetRegisterClass *RC = nullptr;
+ if (i < MI.getDesc().getNumOperands())
+ RC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+ AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+ RegRefs.insert(std::make_pair(Reg, RR));
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Form a group of all defs and uses of a KILL instruction to ensure
+ // that all registers are renamed as a group.
+ if (MI.isKill()) {
+ DEBUG(dbgs() << "\tKill Group:");
+
+ unsigned FirstReg = 0;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (FirstReg != 0) {
+ DEBUG(dbgs() << "=" << TRI->getName(Reg));
+ State->UnionGroups(FirstReg, Reg);
+ } else {
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ FirstReg = Reg;
+ }
+ }
+
+ DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
+ }
+}
+
+BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
+ BitVector BV(TRI->getNumRegs(), false);
+ bool first = true;
+
+ // Check all references that need rewriting for Reg. For each, use
+ // the corresponding register class to narrow the set of registers
+ // that are appropriate for renaming.
+ for (const auto &Q : make_range(State->GetRegRefs().equal_range(Reg))) {
+ const TargetRegisterClass *RC = Q.second.RC;
+ if (!RC) continue;
+
+ BitVector RCBV = TRI->getAllocatableSet(MF, RC);
+ if (first) {
+ BV |= RCBV;
+ first = false;
+ } else {
+ BV &= RCBV;
+ }
+
+ DEBUG(dbgs() << " " << TRI->getRegClassName(RC));
+ }
+
+ return BV;
+}
+
+bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
+ unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap) {
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // Collect all referenced registers in the same group as
+ // AntiDepReg. These all need to be renamed together if we are to
+ // break the anti-dependence.
+ std::vector<unsigned> Regs;
+ State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
+ assert(Regs.size() > 0 && "Empty register group!");
+ if (Regs.size() == 0)
+ return false;
+
+ // Find the "superest" register in the group. At the same time,
+ // collect the BitVector of registers that can be used to rename
+ // each register.
+ DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+ << ":\n");
+ std::map<unsigned, BitVector> RenameRegisterMap;
+ unsigned SuperReg = 0;
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
+ SuperReg = Reg;
+
+ // If Reg has any references, then collect possible rename regs
+ if (RegRefs.count(Reg) > 0) {
+ DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
+
+ BitVector &BV = RenameRegisterMap[Reg];
+ assert(BV.empty());
+ BV = GetRenameRegisters(Reg);
+
+ DEBUG({
+ dbgs() << " ::";
+ for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+ dbgs() << " " << TRI->getName(r);
+ dbgs() << "\n";
+ });
+ }
+ }
+
+ // All group registers should be a subreg of SuperReg.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ if (Reg == SuperReg) continue;
+ bool IsSub = TRI->isSubRegister(SuperReg, Reg);
+ // FIXME: remove this once PR18663 has been properly fixed. For now,
+ // return a conservative answer:
+ // assert(IsSub && "Expecting group subregister");
+ if (!IsSub)
+ return false;
+ }
+
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int renamecnt = 0;
+ if (renamecnt++ % DebugDiv != DebugMod)
+ return false;
+
+ dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
+ " for debug ***\n";
+ }
+#endif
+
+ // Check each possible rename register for SuperReg in round-robin
+ // order. If that register is available, and the corresponding
+ // registers are available for the other group subregisters, then we
+ // can use those registers to rename.
+
+ // FIXME: Using getMinimalPhysRegClass is very conservative. We should
+ // check every use of the register and find the largest register class
+ // that can be used in all of them.
+ const TargetRegisterClass *SuperRC =
+ TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
+
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC);
+ if (Order.empty()) {
+ DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tFind Registers:");
+
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
+
+ unsigned OrigR = RenameOrder[SuperRC];
+ unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+ unsigned R = OrigR;
+ do {
+ if (R == 0) R = Order.size();
+ --R;
+ const unsigned NewSuperReg = Order[R];
+ // Don't consider non-allocatable registers
+ if (!MRI.isAllocatable(NewSuperReg)) continue;
+ // Don't replace a register with itself.
+ if (NewSuperReg == SuperReg) continue;
+
+ DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
+ RenameMap.clear();
+
+ // For each referenced group register (which must be a SuperReg or
+ // a subregister of SuperReg), find the corresponding subregister
+ // of NewSuperReg and make sure it is free to be renamed.
+ for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+ unsigned Reg = Regs[i];
+ unsigned NewReg = 0;
+ if (Reg == SuperReg) {
+ NewReg = NewSuperReg;
+ } else {
+ unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+ if (NewSubRegIdx != 0)
+ NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+ }
+
+ DEBUG(dbgs() << " " << TRI->getName(NewReg));
+
+ // Check if Reg can be renamed to NewReg.
+ if (!RenameRegisterMap[Reg].test(NewReg)) {
+ DEBUG(dbgs() << "(no rename)");
+ goto next_super_reg;
+ }
+
+ // If NewReg is dead and NewReg's most recent def is not before
+ // Regs's kill, it's safe to replace Reg with NewReg. We
+ // must also check all aliases of NewReg, because we can't define a
+ // register when any sub or super is already live.
+ if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+ DEBUG(dbgs() << "(live)");
+ goto next_super_reg;
+ } else {
+ bool found = false;
+ for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (State->IsLive(AliasReg) ||
+ (KillIndices[Reg] > DefIndices[AliasReg])) {
+ DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ goto next_super_reg;
+ }
+
+ // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also
+ // defines 'NewReg' via an early-clobber operand.
+ for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+ MachineInstr *UseMI = Q.second.Operand->getParent();
+ int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI);
+ if (Idx == -1)
+ continue;
+
+ if (UseMI->getOperand(Idx).isEarlyClobber()) {
+ DEBUG(dbgs() << "(ec)");
+ goto next_super_reg;
+ }
+ }
+
+ // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining
+ // 'Reg' is an early-clobber define and that instruction also uses
+ // 'NewReg'.
+ for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+ if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber())
+ continue;
+
+ MachineInstr *DefMI = Q.second.Operand->getParent();
+ if (DefMI->readsRegister(NewReg, TRI)) {
+ DEBUG(dbgs() << "(ec)");
+ goto next_super_reg;
+ }
+ }
+
+ // Record that 'Reg' can be renamed to 'NewReg'.
+ RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
+ }
+
+ // If we fall-out here, then every register in the group can be
+ // renamed, as recorded in RenameMap.
+ RenameOrder.erase(SuperRC);
+ RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+ DEBUG(dbgs() << "]\n");
+ return true;
+
+ next_super_reg:
+ DEBUG(dbgs() << ']');
+ } while (R != EndR);
+
+ DEBUG(dbgs() << '\n');
+
+ // No registers are free and available!
+ return false;
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies within the
+/// ScheduleDAG and break them by renaming registers.
+///
+unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
+ const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+ RegRefs = State->GetRegRefs();
+
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // For each regclass the next register to use for renaming.
+ RenameOrderType RenameOrder;
+
+ // ...need a map from MI to SUnit.
+ std::map<MachineInstr *, const SUnit *> MISUnitMap;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
+ SU));
+ }
+
+ // Track progress along the critical path through the SUnit graph as
+ // we walk the instructions. This is needed for regclasses that only
+ // break critical-path anti-dependencies.
+ const SUnit *CriticalPathSU = nullptr;
+ MachineInstr *CriticalPathMI = nullptr;
+ if (CriticalPathSet.any()) {
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ if (!CriticalPathSU ||
+ ((SU->getDepth() + SU->Latency) >
+ (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+ CriticalPathSU = SU;
+ }
+ }
+
+ CriticalPathMI = CriticalPathSU->getInstr();
+ }
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (!State->IsLive(Reg))
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+#endif
+
+ BitVector RegAliases(TRI->getNumRegs());
+
+ // Attempt to break anti-dependence edges. Walk the instructions
+ // from the bottom up, tracking information about liveness as we go
+ // to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin;
+ I != E; --Count) {
+ MachineInstr &MI = *--I;
+
+ if (MI.isDebugValue())
+ continue;
+
+ DEBUG(dbgs() << "Anti: ");
+ DEBUG(MI.dump());
+
+ std::set<unsigned> PassthruRegs;
+ GetPassthruRegs(MI, PassthruRegs);
+
+ // Process the defs in MI...
+ PrescanInstruction(MI, Count, PassthruRegs);
+
+ // The dependence edges that represent anti- and output-
+ // dependencies that are candidates for breaking.
+ std::vector<const SDep *> Edges;
+ const SUnit *PathSU = MISUnitMap[&MI];
+ AntiDepEdges(PathSU, Edges);
+
+ // If MI is not on the critical path, then we don't rename
+ // registers in the CriticalPathSet.
+ BitVector *ExcludeRegs = nullptr;
+ if (&MI == CriticalPathMI) {
+ CriticalPathSU = CriticalPathStep(CriticalPathSU);
+ CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr;
+ } else if (CriticalPathSet.any()) {
+ ExcludeRegs = &CriticalPathSet;
+ }
+
+ // Ignore KILL instructions (they form a group in ScanInstruction
+ // but don't cause any anti-dependence breaking themselves)
+ if (!MI.isKill()) {
+ // Attempt to break each anti-dependency...
+ for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
+ const SDep *Edge = Edges[i];
+ SUnit *NextSU = Edge->getSUnit();
+
+ if ((Edge->getKind() != SDep::Anti) &&
+ (Edge->getKind() != SDep::Output)) continue;
+
+ unsigned AntiDepReg = Edge->getReg();
+ DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+
+ if (!MRI.isAllocatable(AntiDepReg)) {
+ // Don't break anti-dependencies on non-allocatable registers.
+ DEBUG(dbgs() << " (non-allocatable)\n");
+ continue;
+ } else if (ExcludeRegs && ExcludeRegs->test(AntiDepReg)) {
+ // Don't break anti-dependencies for critical path registers
+ // if not on the critical path
+ DEBUG(dbgs() << " (not critical-path)\n");
+ continue;
+ } else if (PassthruRegs.count(AntiDepReg) != 0) {
+ // If the anti-dep register liveness "passes-thru", then
+ // don't try to change it. It will be changed along with
+ // the use if required to break an earlier antidep.
+ DEBUG(dbgs() << " (passthru)\n");
+ continue;
+ } else {
+ // No anti-dep breaking for implicit deps
+ MachineOperand *AntiDepOp = MI.findRegisterDefOperand(AntiDepReg);
+ assert(AntiDepOp && "Can't find index for defined register operand");
+ if (!AntiDepOp || AntiDepOp->isImplicit()) {
+ DEBUG(dbgs() << " (implicit)\n");
+ continue;
+ }
+
+ // If the SUnit has other dependencies on the SUnit that
+ // it anti-depends on, don't bother breaking the
+ // anti-dependency since those edges would prevent such
+ // units from being scheduled past each other
+ // regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+ PE = PathSU->Preds.end(); P != PE; ++P) {
+ if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
+ (P->getKind() != SDep::Output)) {
+ DEBUG(dbgs() << " (real dependency)\n");
+ AntiDepReg = 0;
+ break;
+ } else if ((P->getSUnit() != NextSU) &&
+ (P->getKind() == SDep::Data) &&
+ (P->getReg() == AntiDepReg)) {
+ DEBUG(dbgs() << " (other dependency)\n");
+ AntiDepReg = 0;
+ break;
+ }
+ }
+
+ if (AntiDepReg == 0) continue;
+
+ // If the definition of the anti-dependency register does not start
+ // a new live range, bail out. This can happen if the anti-dep
+ // register is a sub-register of another register whose live range
+ // spans over PathSU. In such case, PathSU defines only a part of
+ // the larger register.
+ RegAliases.reset();
+ for (MCRegAliasIterator AI(AntiDepReg, TRI, true); AI.isValid(); ++AI)
+ RegAliases.set(*AI);
+ for (SDep S : PathSU->Succs) {
+ SDep::Kind K = S.getKind();
+ if (K != SDep::Data && K != SDep::Output && K != SDep::Anti)
+ continue;
+ unsigned R = S.getReg();
+ if (!RegAliases[R])
+ continue;
+ if (R == AntiDepReg || TRI->isSubRegister(AntiDepReg, R))
+ continue;
+ AntiDepReg = 0;
+ break;
+ }
+
+ if (AntiDepReg == 0) continue;
+ }
+
+ assert(AntiDepReg != 0);
+ if (AntiDepReg == 0) continue;
+
+ // Determine AntiDepReg's register group.
+ const unsigned GroupIndex = State->GetGroup(AntiDepReg);
+ if (GroupIndex == 0) {
+ DEBUG(dbgs() << " (zero group)\n");
+ continue;
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ // Look for a suitable register to use to break the anti-dependence.
+ std::map<unsigned, unsigned> RenameMap;
+ if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
+ DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg) << ":");
+
+ // Handle each group register...
+ for (std::map<unsigned, unsigned>::iterator
+ S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
+ unsigned CurrReg = S->first;
+ unsigned NewReg = S->second;
+
+ DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
+ TRI->getName(NewReg) << "(" <<
+ RegRefs.count(CurrReg) << " refs)");
+
+ // Update the references to the old register CurrReg to
+ // refer to the new register NewReg.
+ for (const auto &Q : make_range(RegRefs.equal_range(CurrReg))) {
+ Q.second.Operand->setReg(NewReg);
+ // If the SU for the instruction being updated has debug
+ // information related to the anti-dependency register, make
+ // sure to update that as well.
+ const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()];
+ if (!SU) continue;
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q.second.Operand->getParent())
+ UpdateDbgValue(*DVI->first, AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for CurrReg is now inconsistent. Set
+ // the state as if it were dead.
+ State->UnionGroups(NewReg, 0);
+ RegRefs.erase(NewReg);
+ DefIndices[NewReg] = DefIndices[CurrReg];
+ KillIndices[NewReg] = KillIndices[CurrReg];
+
+ State->UnionGroups(CurrReg, 0);
+ RegRefs.erase(CurrReg);
+ DefIndices[CurrReg] = KillIndices[CurrReg];
+ KillIndices[CurrReg] = ~0u;
+ assert(((KillIndices[CurrReg] == ~0u) !=
+ (DefIndices[CurrReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+ }
+
+ ++Broken;
+ DEBUG(dbgs() << '\n');
+ }
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
new file mode 100644
index 000000000000..f97e6666b219
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -0,0 +1,179 @@
+//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
+
+namespace llvm {
+class RegisterClassInfo;
+
+ /// Contains all the state necessary for anti-dep breaking.
+class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
+ public:
+ /// Information about a register reference within a liverange
+ typedef struct {
+ /// The registers operand
+ MachineOperand *Operand;
+ /// The register class
+ const TargetRegisterClass *RC;
+ } RegisterReference;
+
+ private:
+ /// Number of non-virtual target registers (i.e. TRI->getNumRegs()).
+ const unsigned NumTargetRegs;
+
+ /// Implements a disjoint-union data structure to
+ /// form register groups. A node is represented by an index into
+ /// the vector. A node can "point to" itself to indicate that it
+ /// is the parent of a group, or point to another node to indicate
+ /// that it is a member of the same group as that node.
+ std::vector<unsigned> GroupNodes;
+
+ /// For each register, the index of the GroupNode
+ /// currently representing the group that the register belongs to.
+ /// Register 0 is always represented by the 0 group, a group
+ /// composed of registers that are not eligible for anti-aliasing.
+ std::vector<unsigned> GroupNodeIndices;
+
+ /// Map registers to all their references within a live range.
+ std::multimap<unsigned, RegisterReference> RegRefs;
+
+ /// The index of the most recent kill (proceeding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// The index of the most recent complete def (proceeding bottom
+ /// up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ public:
+ AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
+
+ /// Return the kill indices.
+ std::vector<unsigned> &GetKillIndices() { return KillIndices; }
+
+ /// Return the define indices.
+ std::vector<unsigned> &GetDefIndices() { return DefIndices; }
+
+ /// Return the RegRefs map.
+ std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
+
+ // Get the group for a register. The returned value is
+ // the index of the GroupNode representing the group.
+ unsigned GetGroup(unsigned Reg);
+
+ // Return a vector of the registers belonging to a group.
+ // If RegRefs is non-NULL then only included referenced registers.
+ void GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned,
+ AggressiveAntiDepState::RegisterReference> *RegRefs);
+
+ // Union Reg1's and Reg2's groups to form a new group.
+ // Return the index of the GroupNode representing the group.
+ unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
+
+ // Remove a register from its current group and place
+ // it alone in its own group. Return the index of the GroupNode
+ // representing the registers new group.
+ unsigned LeaveGroup(unsigned Reg);
+
+ /// Return true if Reg is live.
+ bool IsLive(unsigned Reg);
+ };
+
+ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepBreaker
+ : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// The set of registers that should only be
+ /// renamed if they are on the critical path.
+ BitVector CriticalPathSet;
+
+ /// The state used to identify and rename anti-dependence registers.
+ AggressiveAntiDepState *State;
+
+ public:
+ AggressiveAntiDepBreaker(MachineFunction& MFi,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
+ ~AggressiveAntiDepBreaker() override;
+
+ /// Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB) override;
+
+ /// Identifiy anti-dependencies along the critical path
+ /// of the ScheduleDAG and break them by renaming registers.
+ ///
+ unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) override;
+
+ /// Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) override;
+
+ /// Finish anti-dep breaking for a basic block.
+ void FinishBlock() override;
+
+ private:
+ /// Keep track of a position in the allocation order for each regclass.
+ typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
+
+ /// Return true if MO represents a register
+ /// that is both implicitly used and defined in MI
+ bool IsImplicitDefUse(MachineInstr &MI, MachineOperand &MO);
+
+ /// If MI implicitly def/uses a register, then
+ /// return that register and all subregisters.
+ void GetPassthruRegs(MachineInstr &MI, std::set<unsigned> &PassthruRegs);
+
+ void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+ const char *header = nullptr,
+ const char *footer = nullptr);
+
+ void PrescanInstruction(MachineInstr &MI, unsigned Count,
+ std::set<unsigned> &PassthruRegs);
+ void ScanInstruction(MachineInstr &MI, unsigned Count);
+ BitVector GetRenameRegisters(unsigned Reg);
+ bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+ RenameOrderType& RenameOrder,
+ std::map<unsigned, unsigned> &RenameMap);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 000000000000..40451c0d6c19
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix)
+ : Pos(0) {
+ const MachineFunction &MF = VRM.getMachineFunction();
+ const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
+ Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
+ TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix);
+ rewind();
+
+ DEBUG({
+ if (!Hints.empty()) {
+ dbgs() << "hints:";
+ for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+ dbgs() << ' ' << PrintReg(Hints[I], TRI);
+ dbgs() << '\n';
+ }
+ });
+#ifndef NDEBUG
+ for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+ assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() &&
+ "Target hint is outside allocation order.");
+#endif
+}
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 000000000000..2aee3a63a2b1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,89 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCRegisterInfo.h"
+
+namespace llvm {
+
+class RegisterClassInfo;
+class VirtRegMap;
+class LiveRegMatrix;
+
+class LLVM_LIBRARY_VISIBILITY AllocationOrder {
+ SmallVector<MCPhysReg, 16> Hints;
+ ArrayRef<MCPhysReg> Order;
+ int Pos;
+
+public:
+ /// Create a new AllocationOrder for VirtReg.
+ /// @param VirtReg Virtual register to allocate for.
+ /// @param VRM Virtual register map for function.
+ /// @param RegClassInfo Information about reserved and allocatable registers.
+ AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix);
+
+ /// Get the allocation order without reordered hints.
+ ArrayRef<MCPhysReg> getOrder() const { return Order; }
+
+ /// Return the next physical register in the allocation order, or 0.
+ /// It is safe to call next() again after it returned 0, it will keep
+ /// returning 0 until rewind() is called.
+ unsigned next(unsigned Limit = 0) {
+ if (Pos < 0)
+ return Hints.end()[Pos++];
+ if (!Limit)
+ Limit = Order.size();
+ while (Pos < int(Limit)) {
+ unsigned Reg = Order[Pos++];
+ if (!isHint(Reg))
+ return Reg;
+ }
+ return 0;
+ }
+
+ /// As next(), but allow duplicates to be returned, and stop before the
+ /// Limit'th register in the RegisterClassInfo allocation order.
+ ///
+ /// This can produce more than Limit registers if there are hints.
+ unsigned nextWithDups(unsigned Limit) {
+ if (Pos < 0)
+ return Hints.end()[Pos++];
+ if (Pos < int(Limit))
+ return Order[Pos++];
+ return 0;
+ }
+
+ /// Start over from the beginning.
+ void rewind() { Pos = -int(Hints.size()); }
+
+ /// Return true if the last register returned from next() was a preferred register.
+ bool isHint() const { return Pos <= 0; }
+
+ /// Return true if PhysReg is a preferred register.
+ bool isHint(unsigned PhysReg) const {
+ return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end();
+ }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
new file mode 100644
index 000000000000..d69073458cdf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,731 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+
+using namespace llvm;
+
+/// Compute the linearized index of a member in a nested aggregate/struct/array
+/// by recursing and accumulating CurIndex as long as there are indices in the
+/// index list.
+unsigned llvm::ComputeLinearIndex(Type *Ty,
+ const unsigned *Indices,
+ const unsigned *IndicesEnd,
+ unsigned CurIndex) {
+ // Base case: We're done.
+ if (Indices && Indices == IndicesEnd)
+ return CurIndex;
+
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI) {
+ if (Indices && *Indices == unsigned(EI - EB))
+ return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex);
+ }
+ assert(!Indices && "Unexpected out of bound");
+ return CurIndex;
+ }
+ // Given an array type, recursively traverse the elements.
+ else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
+ unsigned NumElts = ATy->getNumElements();
+ // Compute the Linear offset when jumping one element of the array
+ unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0);
+ if (Indices) {
+ assert(*Indices < NumElts && "Unexpected out of bound");
+ // If the indice is inside the array, compute the index to the requested
+ // elt and recurse inside the element with the end of the indices list
+ CurIndex += EltLinearOffset* *Indices;
+ return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+ }
+ CurIndex += EltLinearOffset*NumElts;
+ return CurIndex;
+ }
+ // We haven't found the type we're looking for, so keep searching.
+ return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ for (StructType::element_iterator EB = STy->element_begin(),
+ EI = EB,
+ EE = STy->element_end();
+ EI != EE; ++EI)
+ ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets,
+ StartingOffset + SL->getElementOffset(EI - EB));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty->isVoidTy())
+ return;
+ // Base case: we can get an EVT for this LLVM IR type.
+ ValueVTs.push_back(TLI.getValueType(DL, Ty));
+ if (Offsets)
+ Offsets->push_back(StartingOffset);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalValue *llvm::ExtractTypeInfo(Value *V) {
+ V = V->stripPointerCasts();
+ GlobalValue *GV = dyn_cast<GlobalValue>(V);
+ GlobalVariable *Var = dyn_cast<GlobalVariable>(V);
+
+ if (Var && Var->getName() == "llvm.eh.catch.all.value") {
+ assert(Var->hasInitializer() &&
+ "The EH catch-all value must have an initializer");
+ Value *Init = Var->getInitializer();
+ GV = dyn_cast<GlobalValue>(Init);
+ if (!GV) V = cast<ConstantPointerNull>(Init);
+ }
+
+ assert((GV || isa<ConstantPointerNull>(V)) &&
+ "TypeInfo must be a global variable or NULL");
+ return GV;
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
+ const TargetLowering &TLI) {
+ for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+ InlineAsm::ConstraintInfo &CI = CInfos[i];
+ for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+ if (CType == TargetLowering::C_Memory)
+ return true;
+ }
+
+ // Indirect operand accesses access memory.
+ if (CI.isIndirect)
+ return true;
+ }
+
+ return false;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code. This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+ switch (Pred) {
+ case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+ case FCmpInst::FCMP_OEQ: return ISD::SETOEQ;
+ case FCmpInst::FCMP_OGT: return ISD::SETOGT;
+ case FCmpInst::FCMP_OGE: return ISD::SETOGE;
+ case FCmpInst::FCMP_OLT: return ISD::SETOLT;
+ case FCmpInst::FCMP_OLE: return ISD::SETOLE;
+ case FCmpInst::FCMP_ONE: return ISD::SETONE;
+ case FCmpInst::FCMP_ORD: return ISD::SETO;
+ case FCmpInst::FCMP_UNO: return ISD::SETUO;
+ case FCmpInst::FCMP_UEQ: return ISD::SETUEQ;
+ case FCmpInst::FCMP_UGT: return ISD::SETUGT;
+ case FCmpInst::FCMP_UGE: return ISD::SETUGE;
+ case FCmpInst::FCMP_ULT: return ISD::SETULT;
+ case FCmpInst::FCMP_ULE: return ISD::SETULE;
+ case FCmpInst::FCMP_UNE: return ISD::SETUNE;
+ case FCmpInst::FCMP_TRUE: return ISD::SETTRUE;
+ default: llvm_unreachable("Invalid FCmp predicate opcode!");
+ }
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+ case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+ case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+ case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+ case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+ case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+ default: return CC;
+ }
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+ switch (Pred) {
+ case ICmpInst::ICMP_EQ: return ISD::SETEQ;
+ case ICmpInst::ICMP_NE: return ISD::SETNE;
+ case ICmpInst::ICMP_SLE: return ISD::SETLE;
+ case ICmpInst::ICMP_ULE: return ISD::SETULE;
+ case ICmpInst::ICMP_SGE: return ISD::SETGE;
+ case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+ case ICmpInst::ICMP_SLT: return ISD::SETLT;
+ case ICmpInst::ICMP_ULT: return ISD::SETULT;
+ case ICmpInst::ICMP_SGT: return ISD::SETGT;
+ case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+ default:
+ llvm_unreachable("Invalid ICmp predicate opcode!");
+ }
+}
+
+static bool isNoopBitcast(Type *T1, Type *T2,
+ const TargetLoweringBase& TLI) {
+ return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
+ (isa<VectorType>(T1) && isa<VectorType>(T2) &&
+ TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2)));
+}
+
+/// Look through operations that will be free to find the earliest source of
+/// this value.
+///
+/// @param ValLoc If V has aggegate type, we will be interested in a particular
+/// scalar component. This records its address; the reverse of this list gives a
+/// sequence of indices appropriate for an extractvalue to locate the important
+/// value. This value is updated during the function and on exit will indicate
+/// similar information for the Value returned.
+///
+/// @param DataBits If this function looks through truncate instructions, this
+/// will record the smallest size attained.
+static const Value *getNoopInput(const Value *V,
+ SmallVectorImpl<unsigned> &ValLoc,
+ unsigned &DataBits,
+ const TargetLoweringBase &TLI,
+ const DataLayout &DL) {
+ while (true) {
+ // Try to look through V1; if V1 is not an instruction, it can't be looked
+ // through.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || I->getNumOperands() == 0) return V;
+ const Value *NoopInput = nullptr;
+
+ Value *Op = I->getOperand(0);
+ if (isa<BitCastInst>(I)) {
+ // Look through truly no-op bitcasts.
+ if (isNoopBitcast(Op->getType(), I->getType(), TLI))
+ NoopInput = Op;
+ } else if (isa<GetElementPtrInst>(I)) {
+ // Look through getelementptr
+ if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+ NoopInput = Op;
+ } else if (isa<IntToPtrInst>(I)) {
+ // Look through inttoptr.
+ // Make sure this isn't a truncating or extending cast. We could
+ // support this eventually, but don't bother for now.
+ if (!isa<VectorType>(I->getType()) &&
+ DL.getPointerSizeInBits() ==
+ cast<IntegerType>(Op->getType())->getBitWidth())
+ NoopInput = Op;
+ } else if (isa<PtrToIntInst>(I)) {
+ // Look through ptrtoint.
+ // Make sure this isn't a truncating or extending cast. We could
+ // support this eventually, but don't bother for now.
+ if (!isa<VectorType>(I->getType()) &&
+ DL.getPointerSizeInBits() ==
+ cast<IntegerType>(I->getType())->getBitWidth())
+ NoopInput = Op;
+ } else if (isa<TruncInst>(I) &&
+ TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
+ DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
+ NoopInput = Op;
+ } else if (isa<CallInst>(I)) {
+ // Look through call (skipping callee)
+ for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1;
+ i != e; ++i) {
+ unsigned attrInd = i - I->op_begin() + 1;
+ if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+ isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+ NoopInput = *i;
+ break;
+ }
+ }
+ } else if (isa<InvokeInst>(I)) {
+ // Look through invoke (skipping BB, BB, Callee)
+ for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3;
+ i != e; ++i) {
+ unsigned attrInd = i - I->op_begin() + 1;
+ if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+ isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+ NoopInput = *i;
+ break;
+ }
+ }
+ } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) {
+ // Value may come from either the aggregate or the scalar
+ ArrayRef<unsigned> InsertLoc = IVI->getIndices();
+ if (ValLoc.size() >= InsertLoc.size() &&
+ std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())) {
+ // The type being inserted is a nested sub-type of the aggregate; we
+ // have to remove those initial indices to get the location we're
+ // interested in for the operand.
+ ValLoc.resize(ValLoc.size() - InsertLoc.size());
+ NoopInput = IVI->getInsertedValueOperand();
+ } else {
+ // The struct we're inserting into has the value we're interested in, no
+ // change of address.
+ NoopInput = Op;
+ }
+ } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
+ // The part we're interested in will inevitably be some sub-section of the
+ // previous aggregate. Combine the two paths to obtain the true address of
+ // our element.
+ ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
+ ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
+ NoopInput = Op;
+ }
+ // Terminate if we couldn't find anything to look through.
+ if (!NoopInput)
+ return V;
+
+ V = NoopInput;
+ }
+}
+
+/// Return true if this scalar return value only has bits discarded on its path
+/// from the "tail call" to the "ret". This includes the obvious noop
+/// instructions handled by getNoopInput above as well as free truncations (or
+/// extensions prior to the call).
+static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
+ SmallVectorImpl<unsigned> &RetIndices,
+ SmallVectorImpl<unsigned> &CallIndices,
+ bool AllowDifferingSizes,
+ const TargetLoweringBase &TLI,
+ const DataLayout &DL) {
+
+ // Trace the sub-value needed by the return value as far back up the graph as
+ // possible, in the hope that it will intersect with the value produced by the
+ // call. In the simple case with no "returned" attribute, the hope is actually
+ // that we end up back at the tail call instruction itself.
+ unsigned BitsRequired = UINT_MAX;
+ RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL);
+
+ // If this slot in the value returned is undef, it doesn't matter what the
+ // call puts there, it'll be fine.
+ if (isa<UndefValue>(RetVal))
+ return true;
+
+ // Now do a similar search up through the graph to find where the value
+ // actually returned by the "tail call" comes from. In the simple case without
+ // a "returned" attribute, the search will be blocked immediately and the loop
+ // a Noop.
+ unsigned BitsProvided = UINT_MAX;
+ CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL);
+
+ // There's no hope if we can't actually trace them to (the same part of!) the
+ // same value.
+ if (CallVal != RetVal || CallIndices != RetIndices)
+ return false;
+
+ // However, intervening truncates may have made the call non-tail. Make sure
+ // all the bits that are needed by the "ret" have been provided by the "tail
+ // call". FIXME: with sufficiently cunning bit-tracking, we could look through
+ // extensions too.
+ if (BitsProvided < BitsRequired ||
+ (!AllowDifferingSizes && BitsProvided != BitsRequired))
+ return false;
+
+ return true;
+}
+
+/// For an aggregate type, determine whether a given index is within bounds or
+/// not.
+static bool indexReallyValid(CompositeType *T, unsigned Idx) {
+ if (ArrayType *AT = dyn_cast<ArrayType>(T))
+ return Idx < AT->getNumElements();
+
+ return Idx < cast<StructType>(T)->getNumElements();
+}
+
+/// Move the given iterators to the next leaf type in depth first traversal.
+///
+/// Performs a depth-first traversal of the type as specified by its arguments,
+/// stopping at the next leaf node (which may be a legitimate scalar type or an
+/// empty struct or array).
+///
+/// @param SubTypes List of the partial components making up the type from
+/// outermost to innermost non-empty aggregate. The element currently
+/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1).
+///
+/// @param Path Set of extractvalue indices leading from the outermost type
+/// (SubTypes[0]) to the leaf node currently represented.
+///
+/// @returns true if a new type was found, false otherwise. Calling this
+/// function again on a finished iterator will repeatedly return
+/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty
+/// aggregate or a non-aggregate
+static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes,
+ SmallVectorImpl<unsigned> &Path) {
+ // First march back up the tree until we can successfully increment one of the
+ // coordinates in Path.
+ while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) {
+ Path.pop_back();
+ SubTypes.pop_back();
+ }
+
+ // If we reached the top, then the iterator is done.
+ if (Path.empty())
+ return false;
+
+ // We know there's *some* valid leaf now, so march back down the tree picking
+ // out the left-most element at each node.
+ ++Path.back();
+ Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back());
+ while (DeeperType->isAggregateType()) {
+ CompositeType *CT = cast<CompositeType>(DeeperType);
+ if (!indexReallyValid(CT, 0))
+ return true;
+
+ SubTypes.push_back(CT);
+ Path.push_back(0);
+
+ DeeperType = CT->getTypeAtIndex(0U);
+ }
+
+ return true;
+}
+
+/// Find the first non-empty, scalar-like type in Next and setup the iterator
+/// components.
+///
+/// Assuming Next is an aggregate of some kind, this function will traverse the
+/// tree from left to right (i.e. depth-first) looking for the first
+/// non-aggregate type which will play a role in function return.
+///
+/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup
+/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first
+/// i32 in that type.
+static bool firstRealType(Type *Next,
+ SmallVectorImpl<CompositeType *> &SubTypes,
+ SmallVectorImpl<unsigned> &Path) {
+ // First initialise the iterator components to the first "leaf" node
+ // (i.e. node with no valid sub-type at any index, so {} does count as a leaf
+ // despite nominally being an aggregate).
+ while (Next->isAggregateType() &&
+ indexReallyValid(cast<CompositeType>(Next), 0)) {
+ SubTypes.push_back(cast<CompositeType>(Next));
+ Path.push_back(0);
+ Next = cast<CompositeType>(Next)->getTypeAtIndex(0U);
+ }
+
+ // If there's no Path now, Next was originally scalar already (or empty
+ // leaf). We're done.
+ if (Path.empty())
+ return true;
+
+ // Otherwise, use normal iteration to keep looking through the tree until we
+ // find a non-aggregate type.
+ while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) {
+ if (!advanceToNextLeafType(SubTypes, Path))
+ return false;
+ }
+
+ return true;
+}
+
+/// Set the iterator data-structures to the next non-empty, non-aggregate
+/// subtype.
+static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
+ SmallVectorImpl<unsigned> &Path) {
+ do {
+ if (!advanceToNextLeafType(SubTypes, Path))
+ return false;
+
+ assert(!Path.empty() && "found a leaf but didn't set the path?");
+ } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType());
+
+ return true;
+}
+
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
+ const Instruction *I = CS.getInstruction();
+ const BasicBlock *ExitBB = I->getParent();
+ const TerminatorInst *Term = ExitBB->getTerminator();
+ const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+
+ // The block must end in a return statement or unreachable.
+ //
+ // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+ // an unreachable, for now. The way tailcall optimization is currently
+ // implemented means it will add an epilogue followed by a jump. That is
+ // not profitable. Also, if the callee is a special function (e.g.
+ // longjmp on x86), it can end up causing miscompilation that has not
+ // been fully understood.
+ if (!Ret &&
+ (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
+ return false;
+
+ // If I will have a chain, make sure no other instruction that will have a
+ // chain interposes between I and the return.
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(I))
+ for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
+ if (&*BBI == I)
+ break;
+ // Debug info intrinsics do not get in the way of tail call optimization.
+ if (isa<DbgInfoIntrinsic>(BBI))
+ continue;
+ if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+ !isSafeToSpeculativelyExecute(&*BBI))
+ return false;
+ }
+
+ const Function *F = ExitBB->getParent();
+ return returnTypeIsEligibleForTailCall(
+ F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
+}
+
+bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
+ const Instruction *I,
+ const ReturnInst *Ret,
+ const TargetLoweringBase &TLI) {
+ // If the block ends with a void return or unreachable, it doesn't matter
+ // what the call's return type is.
+ if (!Ret || Ret->getNumOperands() == 0) return true;
+
+ // If the return value is undef, it doesn't matter what the call's
+ // return type is.
+ if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+ // Make sure the attributes attached to each return are compatible.
+ AttrBuilder CallerAttrs(F->getAttributes(),
+ AttributeSet::ReturnIndex);
+ AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
+ AttributeSet::ReturnIndex);
+
+ // Noalias is completely benign as far as calling convention goes, it
+ // shouldn't affect whether the call is a tail call.
+ CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias);
+ CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias);
+
+ bool AllowDifferingSizes = true;
+ if (CallerAttrs.contains(Attribute::ZExt)) {
+ if (!CalleeAttrs.contains(Attribute::ZExt))
+ return false;
+
+ AllowDifferingSizes = false;
+ CallerAttrs.removeAttribute(Attribute::ZExt);
+ CalleeAttrs.removeAttribute(Attribute::ZExt);
+ } else if (CallerAttrs.contains(Attribute::SExt)) {
+ if (!CalleeAttrs.contains(Attribute::SExt))
+ return false;
+
+ AllowDifferingSizes = false;
+ CallerAttrs.removeAttribute(Attribute::SExt);
+ CalleeAttrs.removeAttribute(Attribute::SExt);
+ }
+
+ // If they're still different, there's some facet we don't understand
+ // (currently only "inreg", but in future who knows). It may be OK but the
+ // only safe option is to reject the tail call.
+ if (CallerAttrs != CalleeAttrs)
+ return false;
+
+ const Value *RetVal = Ret->getOperand(0), *CallVal = I;
+ SmallVector<unsigned, 4> RetPath, CallPath;
+ SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes;
+
+ bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath);
+ bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath);
+
+ // Nothing's actually returned, it doesn't matter what the callee put there
+ // it's a valid tail call.
+ if (RetEmpty)
+ return true;
+
+ // Iterate pairwise through each of the value types making up the tail call
+ // and the corresponding return. For each one we want to know whether it's
+ // essentially going directly from the tail call to the ret, via operations
+ // that end up not generating any code.
+ //
+ // We allow a certain amount of covariance here. For example it's permitted
+ // for the tail call to define more bits than the ret actually cares about
+ // (e.g. via a truncate).
+ do {
+ if (CallEmpty) {
+ // We've exhausted the values produced by the tail call instruction, the
+ // rest are essentially undef. The type doesn't really matter, but we need
+ // *something*.
+ Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back());
+ CallVal = UndefValue::get(SlotType);
+ }
+
+ // The manipulations performed when we're looking through an insertvalue or
+ // an extractvalue would happen at the front of the RetPath list, so since
+ // we have to copy it anyway it's more efficient to create a reversed copy.
+ SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend());
+ SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend());
+
+ // Finally, we can check whether the value produced by the tail call at this
+ // index is compatible with the value we return.
+ if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
+ AllowDifferingSizes, TLI,
+ F->getParent()->getDataLayout()))
+ return false;
+
+ CallEmpty = !nextRealType(CallSubTypes, CallPath);
+ } while(nextRealType(RetSubTypes, RetPath));
+
+ return true;
+}
+
+bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
+ if (!GV->hasLinkOnceODRLinkage())
+ return false;
+
+ // We assume that anyone who sets global unnamed_addr on a non-constant knows
+ // what they're doing.
+ if (GV->hasGlobalUnnamedAddr())
+ return true;
+
+ // If it is a non constant variable, it needs to be uniqued across shared
+ // objects.
+ if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
+ if (!Var->isConstant())
+ return false;
+ }
+
+ return GV->hasAtLeastLocalUnnamedAddr();
+}
+
+static void collectFuncletMembers(
+ DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
+ const MachineBasicBlock *MBB) {
+ SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB};
+ while (!Worklist.empty()) {
+ const MachineBasicBlock *Visiting = Worklist.pop_back_val();
+ // Don't follow blocks which start new funclets.
+ if (Visiting->isEHPad() && Visiting != MBB)
+ continue;
+
+ // Add this MBB to our funclet.
+ auto P = FuncletMembership.insert(std::make_pair(Visiting, Funclet));
+
+ // Don't revisit blocks.
+ if (!P.second) {
+ assert(P.first->second == Funclet && "MBB is part of two funclets!");
+ continue;
+ }
+
+ // Returns are boundaries where funclet transfer can occur, don't follow
+ // successors.
+ if (Visiting->isReturnBlock())
+ continue;
+
+ for (const MachineBasicBlock *Succ : Visiting->successors())
+ Worklist.push_back(Succ);
+ }
+}
+
+DenseMap<const MachineBasicBlock *, int>
+llvm::getFuncletMembership(const MachineFunction &MF) {
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership;
+
+ // We don't have anything to do if there aren't any EH pads.
+ if (!MF.getMMI().hasEHFunclets())
+ return FuncletMembership;
+
+ int EntryBBNumber = MF.front().getNumber();
+ bool IsSEH = isAsynchronousEHPersonality(
+ classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<const MachineBasicBlock *, 16> FuncletBlocks;
+ SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;
+ SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;
+ SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;
+ for (const MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry()) {
+ FuncletBlocks.push_back(&MBB);
+ } else if (IsSEH && MBB.isEHPad()) {
+ SEHCatchPads.push_back(&MBB);
+ } else if (MBB.pred_empty()) {
+ UnreachableBlocks.push_back(&MBB);
+ }
+
+ MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+ // CatchPads are not funclets for SEH so do not consider CatchRet to
+ // transfer control to another funclet.
+ if (MBBI->getOpcode() != TII->getCatchReturnOpcode())
+ continue;
+
+ // FIXME: SEH CatchPads are not necessarily in the parent function:
+ // they could be inside a finally block.
+ const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
+ const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
+ CatchRetSuccessors.push_back(
+ {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
+ }
+
+ // We don't have anything to do if there aren't any EH pads.
+ if (FuncletBlocks.empty())
+ return FuncletMembership;
+
+ // Identify all the basic blocks reachable from the function entry.
+ collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front());
+ // All blocks not part of a funclet are in the parent function.
+ for (const MachineBasicBlock *MBB : UnreachableBlocks)
+ collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
+ // Next, identify all the blocks inside the funclets.
+ for (const MachineBasicBlock *MBB : FuncletBlocks)
+ collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB);
+ // SEH CatchPads aren't really funclets, handle them separately.
+ for (const MachineBasicBlock *MBB : SEHCatchPads)
+ collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
+ // Finally, identify all the targets of a catchret.
+ for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
+ CatchRetSuccessors)
+ collectFuncletMembers(FuncletMembership, CatchRetPair.second,
+ CatchRetPair.first);
+ return FuncletMembership;
+}
diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
new file mode 100644
index 000000000000..04f7f419f5ea
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h
@@ -0,0 +1,67 @@
+//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <vector>
+
+namespace llvm {
+
+/// This class works in conjunction with the post-RA scheduler to rename
+/// registers to break register anti-dependencies (WAR hazards).
+class LLVM_LIBRARY_VISIBILITY AntiDepBreaker {
+public:
+ typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
+ DbgValueVector;
+
+ virtual ~AntiDepBreaker();
+
+ /// Initialize anti-dep breaking for a new basic block.
+ virtual void StartBlock(MachineBasicBlock *BB) =0;
+
+ /// Identifiy anti-dependencies within a basic-block region and break them by
+ /// renaming registers. Return the number of anti-dependencies broken.
+ virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) = 0;
+
+ /// Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ virtual void Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) = 0;
+
+ /// Finish anti-dep breaking for a basic block.
+ virtual void FinishBlock() =0;
+
+ /// Update DBG_VALUE if dependency breaker is updating
+ /// other machine instruction to use NewReg.
+ void UpdateDbgValue(MachineInstr &MI, unsigned OldReg, unsigned NewReg) {
+ assert(MI.isDebugValue() && "MI is not DBG_VALUE!");
+ if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg)
+ MI.getOperand(0).setReg(NewReg);
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
new file mode 100644
index 000000000000..5294c98e314d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -0,0 +1,139 @@
+//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
+
+ARMException::~ARMException() {}
+
+ARMTargetStreamer &ARMException::getTargetStreamer() {
+ MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer();
+ return static_cast<ARMTargetStreamer &>(TS);
+}
+
+/// endModule - Emit all exception information that should come after the
+/// content.
+void ARMException::endModule() {
+ if (shouldEmitCFI)
+ Asm->OutStreamer->EmitCFISections(false, true);
+}
+
+void ARMException::beginFunction(const MachineFunction *MF) {
+ if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
+ getTargetStreamer().emitFnStart();
+ // See if we need call frame info.
+ AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
+ assert(MoveType != AsmPrinter::CFI_M_EH &&
+ "non-EH CFI not yet supported in prologue with EHABI lowering");
+ if (MoveType == AsmPrinter::CFI_M_Debug) {
+ shouldEmitCFI = true;
+ Asm->OutStreamer->EmitCFIStartProc(false);
+ }
+}
+
+/// endFunction - Gather and emit post-function exception information.
+///
+void ARMException::endFunction(const MachineFunction *MF) {
+ ARMTargetStreamer &ATS = getTargetStreamer();
+ const Function *F = MF->getFunction();
+ const Function *Per = nullptr;
+ if (F->hasPersonalityFn())
+ Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ bool forceEmitPersonality =
+ F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+ F->needsUnwindTableEntry();
+ bool shouldEmitPersonality = forceEmitPersonality ||
+ !MMI->getLandingPads().empty();
+ if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
+ !shouldEmitPersonality)
+ ATS.emitCantUnwind();
+ else if (shouldEmitPersonality) {
+ // Emit references to personality.
+ if (Per) {
+ MCSymbol *PerSym = Asm->getSymbol(Per);
+ Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global);
+ ATS.emitPersonality(PerSym);
+ }
+
+ // Emit .handlerdata directive.
+ ATS.emitHandlerData();
+
+ // Emit actual exception table
+ emitExceptionTable();
+ }
+
+ if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
+ ATS.emitFnEnd();
+}
+
+void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
+ const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+ bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+
+ int Entry = 0;
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer->AddBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (const GlobalValue *GV : reverse(TypeInfos)) {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
+ Asm->EmitTTypeReference(GV, TTypeEncoding);
+ }
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer->AddBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (TypeID != 0)
+ Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
+ }
+
+ Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]),
+ TTypeEncoding);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
new file mode 100644
index 000000000000..8c6838394ac9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddressPool.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+class MCExpr;
+
+unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
+ HasBeenUsed = true;
+ auto IterBool =
+ Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS)));
+ return IterBool.first->second.Number;
+}
+
+// Emit addresses into the section given.
+void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
+ if (Pool.empty())
+ return;
+
+ // Start the dwarf addr section.
+ Asm.OutStreamer->SwitchSection(AddrSection);
+
+ // Order the address pool entries by ID
+ SmallVector<const MCExpr *, 64> Entries(Pool.size());
+
+ for (const auto &I : Pool)
+ Entries[I.second.Number] =
+ I.second.TLS
+ ? Asm.getObjFileLowering().getDebugThreadLocalSymbol(I.first)
+ : MCSymbolRefExpr::create(I.first, Asm.OutContext);
+
+ for (const MCExpr *Entry : Entries)
+ Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize());
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
new file mode 100644
index 000000000000..ba3e3b7c315d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -0,0 +1,52 @@
+//===-- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSymbol.h"
+
+namespace llvm {
+class MCSection;
+class AsmPrinter;
+// Collection of addresses for this unit and assorted labels.
+// A Symbol->unsigned mapping of addresses used by indirect
+// references.
+class AddressPool {
+ struct AddressPoolEntry {
+ unsigned Number;
+ bool TLS;
+ AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {}
+ };
+ DenseMap<const MCSymbol *, AddressPoolEntry> Pool;
+
+ /// Record whether the AddressPool has been queried for an address index since
+ /// the last "resetUsedFlag" call. Used to implement type unit fallback - a
+ /// type that references addresses cannot be placed in a type unit when using
+ /// fission.
+ bool HasBeenUsed;
+
+public:
+ AddressPool() : HasBeenUsed(false) {}
+
+ /// \brief Returns the index into the address pool with the given
+ /// label/symbol.
+ unsigned getIndex(const MCSymbol *Sym, bool TLS = false);
+
+ void emit(AsmPrinter &Asm, MCSection *AddrSection);
+
+ bool isEmpty() { return Pool.empty(); }
+
+ bool hasBeenUsed() const { return HasBeenUsed; }
+
+ void resetUsedFlag() { HasBeenUsed = false; }
+};
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 000000000000..272baceeed89
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,2620 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "CodeViewDebug.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "WinException.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+static const char *const DWARFGroupName = "DWARF Emission";
+static const char *const DbgTimerName = "Debug Info Emission";
+static const char *const EHTimerName = "DWARF Exception Writer";
+static const char *const CodeViewLineTablesGroupName = "CodeView Line Tables";
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+char AsmPrinter::ID = 0;
+
+typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type;
+static gcp_map_type &getGCMap(void *&P) {
+ if (!P)
+ P = new gcp_map_type();
+ return *(gcp_map_type*)P;
+}
+
+
+/// getGVAlignmentLog2 - Return the alignment to use for the specified global
+/// value in log2 form. This rounds up to the preferred alignment if possible
+/// and legal.
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
+ unsigned InBits = 0) {
+ unsigned NumBits = 0;
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ NumBits = DL.getPreferredAlignmentLog(GVar);
+
+ // If InBits is specified, round it to it.
+ if (InBits > NumBits)
+ NumBits = InBits;
+
+ // If the GV has a specified alignment, take it into account.
+ if (GV->getAlignment() == 0)
+ return NumBits;
+
+ unsigned GVAlign = Log2_32(GV->getAlignment());
+
+ // If the GVAlign is larger than NumBits, or if we are required to obey
+ // NumBits because the GV has an assigned section, obey it.
+ if (GVAlign > NumBits || GV->hasSection())
+ NumBits = GVAlign;
+ return NumBits;
+}
+
+AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
+ : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
+ OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
+ LastMI(nullptr), LastFn(0), Counter(~0U) {
+ DD = nullptr;
+ MMI = nullptr;
+ LI = nullptr;
+ MF = nullptr;
+ CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr;
+ CurrentFnBegin = nullptr;
+ CurrentFnEnd = nullptr;
+ GCMetadataPrinters = nullptr;
+ VerboseAsm = OutStreamer->isVerboseAsm();
+}
+
+AsmPrinter::~AsmPrinter() {
+ assert(!DD && Handlers.empty() && "Debug/EH info didn't get finalized");
+
+ if (GCMetadataPrinters) {
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+
+ delete &GCMap;
+ GCMetadataPrinters = nullptr;
+ }
+}
+
+bool AsmPrinter::isPositionIndependent() const {
+ return TM.isPositionIndependent();
+}
+
+/// getFunctionNumber - Return a unique ID for the current function.
+///
+unsigned AsmPrinter::getFunctionNumber() const {
+ return MF->getFunctionNumber();
+}
+
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+ return *TM.getObjFileLowering();
+}
+
+const DataLayout &AsmPrinter::getDataLayout() const {
+ return MMI->getModule()->getDataLayout();
+}
+
+// Do not use the cached DataLayout because some client use it without a Module
+// (llmv-dsymutil, llvm-dwarfdump).
+unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); }
+
+const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
+ assert(MF && "getSubtargetInfo requires a valid MachineFunction!");
+ return MF->getSubtarget<MCSubtargetInfo>();
+}
+
+void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
+ S.EmitInstruction(Inst, getSubtargetInfo());
+}
+
+StringRef AsmPrinter::getTargetTriple() const {
+ return TM.getTargetTriple().str();
+}
+
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+ return OutStreamer->getCurrentSection().first;
+}
+
+
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+ if (isVerbose())
+ AU.addRequired<MachineLoopInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+ // Initialize TargetLoweringObjectFile.
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
+
+ OutStreamer->InitSections(false);
+
+ Mang = new Mangler();
+
+ // Emit the version-min deplyment target directive if needed.
+ //
+ // FIXME: If we end up with a collection of these sorts of Darwin-specific
+ // or ELF-specific things, it may make sense to have a platform helper class
+ // that will work with the target helper class. For now keep it here, as the
+ // alternative is duplicated code in each of the target asm printers that
+ // use the directive, where it would need the same conditionalization
+ // anyway.
+ Triple TT(getTargetTriple());
+ // If there is a version specified, Major will be non-zero.
+ if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) {
+ unsigned Major, Minor, Update;
+ MCVersionMinType VersionType;
+ if (TT.isWatchOS()) {
+ VersionType = MCVM_WatchOSVersionMin;
+ TT.getWatchOSVersion(Major, Minor, Update);
+ } else if (TT.isTvOS()) {
+ VersionType = MCVM_TvOSVersionMin;
+ TT.getiOSVersion(Major, Minor, Update);
+ } else if (TT.isMacOSX()) {
+ VersionType = MCVM_OSXVersionMin;
+ if (!TT.getMacOSXVersion(Major, Minor, Update))
+ Major = 0;
+ } else {
+ VersionType = MCVM_IOSVersionMin;
+ TT.getiOSVersion(Major, Minor, Update);
+ }
+ if (Major != 0)
+ OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
+ }
+
+ // Allow the target to emit any magic that it wants at the start of the file.
+ EmitStartOfAsmFile(M);
+
+ // Very minimal debug info. It is ignored if we emit actual debug info. If we
+ // don't, this at least helps the user find where a global came from.
+ if (MAI->hasSingleParameterDotFile()) {
+ // .file "foo.c"
+ OutStreamer->EmitFileDirective(M.getModuleIdentifier());
+ }
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (auto &I : *MI)
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+ MP->beginAssembly(M, *MI, *this);
+
+ // Emit module-level inline asm if it exists.
+ if (!M.getModuleInlineAsm().empty()) {
+ // We're at the module level. Construct MCSubtarget from the default CPU
+ // and target triple.
+ std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+ TM.getTargetTriple().str(), TM.getTargetCPU(),
+ TM.getTargetFeatureString()));
+ OutStreamer->AddComment("Start of file scope inline assembly");
+ OutStreamer->AddBlankLine();
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n",
+ OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions);
+ OutStreamer->AddComment("End of file scope inline assembly");
+ OutStreamer->AddBlankLine();
+ }
+
+ if (MAI->doesSupportDebugInformation()) {
+ bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
+ if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+ Handlers.push_back(HandlerInfo(new CodeViewDebug(this),
+ DbgTimerName,
+ CodeViewLineTablesGroupName));
+ }
+ if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
+ DD = new DwarfDebug(this, &M);
+ DD->beginModule();
+ Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
+ }
+ }
+
+ EHStreamer *ES = nullptr;
+ switch (MAI->getExceptionHandlingType()) {
+ case ExceptionHandling::None:
+ break;
+ case ExceptionHandling::SjLj:
+ case ExceptionHandling::DwarfCFI:
+ ES = new DwarfCFIException(this);
+ break;
+ case ExceptionHandling::ARM:
+ ES = new ARMException(this);
+ break;
+ case ExceptionHandling::WinEH:
+ switch (MAI->getWinEHEncodingType()) {
+ default: llvm_unreachable("unsupported unwinding information encoding");
+ case WinEH::EncodingType::Invalid:
+ break;
+ case WinEH::EncodingType::X86:
+ case WinEH::EncodingType::Itanium:
+ ES = new WinException(this);
+ break;
+ }
+ break;
+ }
+ if (ES)
+ Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName));
+ return false;
+}
+
+static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) {
+ if (!MAI.hasWeakDefCanBeHiddenDirective())
+ return false;
+
+ return canBeOmittedFromSymbolTable(GV);
+}
+
+void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
+ GlobalValue::LinkageTypes Linkage = GV->getLinkage();
+ switch (Linkage) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ if (MAI->hasWeakDefDirective()) {
+ // .globl _foo
+ OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
+
+ if (!canBeHidden(GV, *MAI))
+ // .weak_definition _foo
+ OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ else
+ OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+ } else if (MAI->hasLinkOnceDirective()) {
+ // .globl _foo
+ OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
+ //NOTE: linkonce is handled by the section the symbol was assigned to.
+ } else {
+ // .weak _foo
+ OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak);
+ }
+ return;
+ case GlobalValue::ExternalLinkage:
+ // If external, declare as a global symbol: .globl _foo
+ OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
+ return;
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ return;
+ case GlobalValue::AppendingLinkage:
+ case GlobalValue::AvailableExternallyLinkage:
+ case GlobalValue::ExternalWeakLinkage:
+ llvm_unreachable("Should never emit this");
+ }
+ llvm_unreachable("Unknown linkage type!");
+}
+
+void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name,
+ const GlobalValue *GV) const {
+ TM.getNameWithPrefix(Name, GV, *Mang);
+}
+
+MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
+ return TM.getSymbol(GV, *Mang);
+}
+
+/// EmitGlobalVariable - Emit the specified global variable to the .s file.
+void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ bool IsEmuTLSVar = TM.Options.EmulatedTLS && GV->isThreadLocal();
+ assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&
+ "No emulated TLS variables in the common section");
+
+ // Never emit TLS variable xyz in emulated TLS model.
+ // The initialization value is in __emutls_t.xyz instead of xyz.
+ if (IsEmuTLSVar)
+ return;
+
+ if (GV->hasInitializer()) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GV))
+ return;
+
+ // Skip the emission of global equivalents. The symbol can be emitted later
+ // on by emitGlobalGOTEquivs in case it turns out to be needed.
+ if (GlobalGOTEquivs.count(getSymbol(GV)))
+ return;
+
+ if (isVerbose()) {
+ // When printing the control variable __emutls_v.*,
+ // we don't need to print the original TLS variable name.
+ GV->printAsOperand(OutStreamer->GetCommentOS(),
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer->GetCommentOS() << '\n';
+ }
+ }
+
+ MCSymbol *GVSym = getSymbol(GV);
+ MCSymbol *EmittedSym = GVSym;
+
+ // getOrCreateEmuTLSControlSym only creates the symbol with name and default
+ // attributes.
+ // GV's or GVSym's attributes will be used for the EmittedSym.
+ EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration());
+
+ if (!GV->hasInitializer()) // External globals require no extra code.
+ return;
+
+ GVSym->redefineIfPossible();
+ if (GVSym->isDefined() || GVSym->isVariable())
+ report_fatal_error("symbol '" + Twine(GVSym->getName()) +
+ "' is already defined");
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject);
+
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+
+ // If the alignment is specified, we *must* obey it. Overaligning a global
+ // with a specified alignment is a prompt way to break globals emitted to
+ // sections and expected to be contiguous (e.g. ObjC metadata).
+ unsigned AlignLog = getGVAlignmentLog2(GV, DL);
+
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ HI.Handler->setSymbolSize(GVSym, Size);
+ }
+
+ // Handle common symbols
+ if (GVKind.isCommon()) {
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ unsigned Align = 1 << AlignLog;
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .comm _foo, 42, 4
+ OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ // Determine to which section this global should be emitted.
+ MCSection *TheSection =
+ getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
+
+ // If we have a bss global going to a section that supports the
+ // zerofill directive, do so here.
+ if (GVKind.isBSS() && MAI->hasMachoZeroFillDirective() &&
+ TheSection->isVirtualSection()) {
+ if (Size == 0)
+ Size = 1; // zerofill of 0 bytes is undefined.
+ unsigned Align = 1 << AlignLog;
+ EmitLinkage(GV, GVSym);
+ // .zerofill __DATA, __bss, _foo, 400, 5
+ OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align);
+ return;
+ }
+
+ // If this is a BSS local symbol and we are emitting in the BSS
+ // section use .lcomm/.comm directive.
+ if (GVKind.isBSSLocal() &&
+ getObjFileLowering().getBSSSection() == TheSection) {
+ if (Size == 0)
+ Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ unsigned Align = 1 << AlignLog;
+
+ // Use .lcomm only if it supports user-specified alignment.
+ // Otherwise, while it would still be correct to use .lcomm in some
+ // cases (e.g. when Align == 1), the external assembler might enfore
+ // some -unknown- default alignment behavior, which could cause
+ // spurious differences between external and integrated assembler.
+ // Prefer to simply fall back to .local / .comm in this case.
+ if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
+ // .lcomm _foo, 42
+ OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
+ // .local _foo
+ OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local);
+ // .comm _foo, 42, 4
+ OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ return;
+ }
+
+ // Handle thread local data for mach-o which requires us to output an
+ // additional structure of data and mangle the original symbol so that we
+ // can reference it later.
+ //
+ // TODO: This should become an "emit thread local global" method on TLOF.
+ // All of this macho specific stuff should be sunk down into TLOFMachO and
+ // stuff like "TLSExtraDataSection" should no longer be part of the parent
+ // TLOF class. This will also make it more obvious that stuff like
+ // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+ // specific code.
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+ // Emit the .tbss symbol
+ MCSymbol *MangSym =
+ OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+
+ if (GVKind.isThreadBSS()) {
+ TheSection = getObjFileLowering().getTLSBSSSection();
+ OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+ } else if (GVKind.isThreadData()) {
+ OutStreamer->SwitchSection(TheSection);
+
+ EmitAlignment(AlignLog, GV);
+ OutStreamer->EmitLabel(MangSym);
+
+ EmitGlobalConstant(GV->getParent()->getDataLayout(),
+ GV->getInitializer());
+ }
+
+ OutStreamer->AddBlankLine();
+
+ // Emit the variable struct for the runtime.
+ MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection();
+
+ OutStreamer->SwitchSection(TLVSect);
+ // Emit the linkage here.
+ EmitLinkage(GV, GVSym);
+ OutStreamer->EmitLabel(GVSym);
+
+ // Three pointers in size:
+ // - __tlv_bootstrap - used to make sure support exists
+ // - spare pointer, used when mapped by the runtime
+ // - pointer to mangled symbol above with initializer
+ unsigned PtrSize = DL.getPointerTypeSize(GV->getType());
+ OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+ PtrSize);
+ OutStreamer->EmitIntValue(0, PtrSize);
+ OutStreamer->EmitSymbolValue(MangSym, PtrSize);
+
+ OutStreamer->AddBlankLine();
+ return;
+ }
+
+ MCSymbol *EmittedInitSym = GVSym;
+
+ OutStreamer->SwitchSection(TheSection);
+
+ EmitLinkage(GV, EmittedInitSym);
+ EmitAlignment(AlignLog, GV);
+
+ OutStreamer->EmitLabel(EmittedInitSym);
+
+ EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ // .size foo, 42
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym),
+ MCConstantExpr::create(Size, OutContext));
+
+ OutStreamer->AddBlankLine();
+}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::EmitFunctionHeader() {
+ // Print out constants referenced by the function
+ EmitConstantPool();
+
+ // Print the 'header' of function.
+ const Function *F = MF->getFunction();
+
+ OutStreamer->SwitchSection(
+ getObjFileLowering().SectionForGlobal(F, *Mang, TM));
+ EmitVisibility(CurrentFnSym, F->getVisibility());
+
+ EmitLinkage(F, CurrentFnSym);
+ if (MAI->hasFunctionAlignment())
+ EmitAlignment(MF->getAlignment(), F);
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+ if (isVerbose()) {
+ F->printAsOperand(OutStreamer->GetCommentOS(),
+ /*PrintType=*/false, F->getParent());
+ OutStreamer->GetCommentOS() << '\n';
+ }
+
+ // Emit the prefix data.
+ if (F->hasPrefixData())
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+
+ // Emit the CurrentFnSym. This is a virtual function to allow targets to
+ // do their wild and crazy things as required.
+ EmitFunctionEntryLabel();
+
+ // If the function had address-taken blocks that got deleted, then we have
+ // references to the dangling symbols. Emit them at the start of the function
+ // so that we don't get references to undefined symbols.
+ std::vector<MCSymbol*> DeadBlockSyms;
+ MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+ for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+ OutStreamer->AddComment("Address taken block that was later removed");
+ OutStreamer->EmitLabel(DeadBlockSyms[i]);
+ }
+
+ if (CurrentFnBegin) {
+ if (MAI->useAssignmentForEHBegin()) {
+ MCSymbol *CurPos = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(CurPos);
+ OutStreamer->EmitAssignment(CurrentFnBegin,
+ MCSymbolRefExpr::create(CurPos, OutContext));
+ } else {
+ OutStreamer->EmitLabel(CurrentFnBegin);
+ }
+ }
+
+ // Emit pre-function debug and/or EH information.
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ HI.Handler->beginFunction(MF);
+ }
+
+ // Emit the prologue data.
+ if (F->hasPrologueData())
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData());
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function. This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::EmitFunctionEntryLabel() {
+ CurrentFnSym->redefineIfPossible();
+
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (CurrentFnSym->isVariable())
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' is a protected alias");
+ if (CurrentFnSym->isDefined())
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
+
+ return OutStreamer->EmitLabel(CurrentFnSym);
+}
+
+/// emitComments - Pretty-print comments for instructions.
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ // Check for spills and reloads
+ int FI;
+
+ const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+
+ // We assume a single instruction only has a spill or reload, not
+ // both.
+ const MachineMemOperand *MMO;
+ if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Reload\n";
+ }
+ } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
+ CommentOS << MMO->getSize() << "-byte Spill\n";
+ }
+ } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI))
+ CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ }
+
+ // Check for spill-induced copies
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
+}
+
+/// emitImplicitDef - This method emits the specified machine instruction
+/// that is an implicit def.
+void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
+ unsigned RegNo = MI->getOperand(0).getReg();
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "implicit-def: "
+ << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo());
+
+ OutStreamer->AddComment(OS.str());
+ OutStreamer->AddBlankLine();
+}
+
+static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "kill:";
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+ assert(Op.isReg() && "KILL instruction must have only register operands");
+ OS << ' '
+ << PrintReg(Op.getReg(),
+ AP.MF->getSubtarget().getRegisterInfo())
+ << (Op.isDef() ? "<def>" : "<kill>");
+ }
+ AP.OutStreamer->AddComment(OS.str());
+ AP.OutStreamer->AddBlankLine();
+}
+
+/// emitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so. A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+ // This code handles only the 4-operand target-independent form.
+ if (MI->getNumOperands() != 4)
+ return false;
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "DEBUG_VALUE: ";
+
+ const DILocalVariable *V = MI->getDebugVariable();
+ if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) {
+ StringRef Name = SP->getDisplayName();
+ if (!Name.empty())
+ OS << Name << ":";
+ }
+ OS << V->getName();
+
+ const DIExpression *Expr = MI->getDebugExpression();
+ if (Expr->isBitPiece())
+ OS << " [bit_piece offset=" << Expr->getBitPieceOffset()
+ << " size=" << Expr->getBitPieceSize() << "]";
+ OS << " <- ";
+
+ // The second operand is only an offset if it's an immediate.
+ bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
+ int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
+
+ for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
+ if (Deref) {
+ // We currently don't support extra Offsets or derefs after the first
+ // one. Bail out early instead of emitting an incorrect comment
+ OS << " [complex expression]";
+ AP.OutStreamer->emitRawComment(OS.str());
+ return true;
+ }
+ uint64_t Op = Expr->getElement(i);
+ if (Op == dwarf::DW_OP_deref) {
+ Deref = true;
+ continue;
+ } else if (Op == dwarf::DW_OP_bit_piece) {
+ // There can't be any operands after this in a valid expression
+ break;
+ }
+ uint64_t ExtraOffset = Expr->getElement(i++);
+ if (Op == dwarf::DW_OP_plus)
+ Offset += ExtraOffset;
+ else {
+ assert(Op == dwarf::DW_OP_minus);
+ Offset -= ExtraOffset;
+ }
+ }
+
+ // Register or immediate value. Register 0 means undef.
+ if (MI->getOperand(0).isFPImm()) {
+ APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+ if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+ OS << (double)APF.convertToFloat();
+ } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+ OS << APF.convertToDouble();
+ } else {
+ // There is no good way to print long double. Convert a copy to
+ // double. Ah well, it's only a comment.
+ bool ignored;
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &ignored);
+ OS << "(long double) " << APF.convertToDouble();
+ }
+ } else if (MI->getOperand(0).isImm()) {
+ OS << MI->getOperand(0).getImm();
+ } else if (MI->getOperand(0).isCImm()) {
+ MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+ } else {
+ unsigned Reg;
+ if (MI->getOperand(0).isReg()) {
+ Reg = MI->getOperand(0).getReg();
+ } else {
+ assert(MI->getOperand(0).isFI() && "Unknown operand type");
+ const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering();
+ Offset += TFI->getFrameIndexReference(*AP.MF,
+ MI->getOperand(0).getIndex(), Reg);
+ Deref = true;
+ }
+ if (Reg == 0) {
+ // Suppress offset, it is not meaningful here.
+ OS << "undef";
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer->emitRawComment(OS.str());
+ return true;
+ }
+ if (Deref)
+ OS << '[';
+ OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
+ }
+
+ if (Deref)
+ OS << '+' << Offset << ']';
+
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer->emitRawComment(OS.str());
+ return true;
+}
+
+AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI &&
+ MF->getFunction()->needsUnwindTableEntry())
+ return CFI_M_EH;
+
+ if (MMI->hasDebugInfo())
+ return CFI_M_Debug;
+
+ return CFI_M_None;
+}
+
+bool AsmPrinter::needsSEHMoves() {
+ return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry();
+}
+
+void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
+ ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
+ if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
+ ExceptionHandlingType != ExceptionHandling::ARM)
+ return;
+
+ if (needsCFIMoves() == CFI_M_None)
+ return;
+
+ const MachineModuleInfo &MMI = MF->getMMI();
+ const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions();
+ unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
+ const MCCFIInstruction &CFI = Instrs[CFIIndex];
+ emitCFIInstruction(CFI);
+}
+
+void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
+ // The operands are the MCSymbol and the frame offset of the allocation.
+ MCSymbol *FrameAllocSym = MI.getOperand(0).getMCSymbol();
+ int FrameOffset = MI.getOperand(1).getImm();
+
+ // Emit a symbol assignment.
+ OutStreamer->EmitAssignment(FrameAllocSym,
+ MCConstantExpr::create(FrameOffset, OutContext));
+}
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::EmitFunctionBody() {
+ EmitFunctionHeader();
+
+ // Emit target-specific gunk before the function body.
+ EmitFunctionBodyStart();
+
+ bool ShouldPrintDebugScopes = MMI->hasDebugInfo();
+
+ // Print out code for the function.
+ bool HasAnyRealCode = false;
+ for (auto &MBB : *MF) {
+ // Print a label for the basic block.
+ EmitBasicBlockStart(MBB);
+ for (auto &MI : MBB) {
+
+ // Print the assembly for the instruction.
+ if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
+ !MI.isDebugValue()) {
+ HasAnyRealCode = true;
+ ++EmittedInsts;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
+ TimePassesIsEnabled);
+ HI.Handler->beginInstruction(&MI);
+ }
+ }
+
+ if (isVerbose())
+ emitComments(MI, OutStreamer->GetCommentOS());
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::CFI_INSTRUCTION:
+ emitCFIInstruction(MI);
+ break;
+
+ case TargetOpcode::LOCAL_ESCAPE:
+ emitFrameAlloc(MI);
+ break;
+
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::GC_LABEL:
+ OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::INLINEASM:
+ EmitInlineAsm(&MI);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ if (isVerbose()) {
+ if (!emitDebugValueComment(&MI, *this))
+ EmitInstruction(&MI);
+ }
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ if (isVerbose()) emitImplicitDef(&MI);
+ break;
+ case TargetOpcode::KILL:
+ if (isVerbose()) emitKill(&MI, *this);
+ break;
+ default:
+ EmitInstruction(&MI);
+ break;
+ }
+
+ if (ShouldPrintDebugScopes) {
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
+ TimePassesIsEnabled);
+ HI.Handler->endInstruction();
+ }
+ }
+ }
+
+ EmitBasicBlockEnd(MBB);
+ }
+
+ // If the function is empty and the object file uses .subsections_via_symbols,
+ // then we need to emit *something* to the function body to prevent the
+ // labels from collapsing together. Just emit a noop.
+ if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) {
+ MCInst Noop;
+ MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop);
+ OutStreamer->AddComment("avoids zero-length function");
+
+ // Targets can opt-out of emitting the noop here by leaving the opcode
+ // unspecified.
+ if (Noop.getOpcode())
+ OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
+ }
+
+ const Function *F = MF->getFunction();
+ for (const auto &BB : *F) {
+ if (!BB.hasAddressTaken())
+ continue;
+ MCSymbol *Sym = GetBlockAddressSymbol(&BB);
+ if (Sym->isDefined())
+ continue;
+ OutStreamer->AddComment("Address of block that was removed by CodeGen");
+ OutStreamer->EmitLabel(Sym);
+ }
+
+ // Emit target-specific gunk after the function body.
+ EmitFunctionBodyEnd();
+
+ if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+ MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
+ // Create a symbol for the end of function.
+ CurrentFnEnd = createTempSymbol("func_end");
+ OutStreamer->EmitLabel(CurrentFnEnd);
+ }
+
+ // If the target wants a .size directive for the size of the function, emit
+ // it.
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ // We can get the size as difference between the function label and the
+ // temp label.
+ const MCExpr *SizeExp = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(CurrentFnEnd, OutContext),
+ MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext);
+ if (auto Sym = dyn_cast<MCSymbolELF>(CurrentFnSym))
+ OutStreamer->emitELFSize(Sym, SizeExp);
+ }
+
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ HI.Handler->markFunctionEnd();
+ }
+
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo();
+
+ // Emit post-function debug and/or EH information.
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ HI.Handler->endFunction(MF);
+ }
+ MMI->EndFunction();
+
+ OutStreamer->AddBlankLine();
+}
+
+/// \brief Compute the number of Global Variables that uses a Constant.
+static unsigned getNumGlobalVariableUses(const Constant *C) {
+ if (!C)
+ return 0;
+
+ if (isa<GlobalVariable>(C))
+ return 1;
+
+ unsigned NumUses = 0;
+ for (auto *CU : C->users())
+ NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU));
+
+ return NumUses;
+}
+
+/// \brief Only consider global GOT equivalents if at least one user is a
+/// cstexpr inside an initializer of another global variables. Also, don't
+/// handle cstexpr inside instructions. During global variable emission,
+/// candidates are skipped and are emitted later in case at least one cstexpr
+/// isn't replaced by a PC relative GOT entry access.
+static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
+ unsigned &NumGOTEquivUsers) {
+ // Global GOT equivalents are unnamed private globals with a constant
+ // pointer initializer to another global symbol. They must point to a
+ // GlobalVariable or Function, i.e., as GlobalValue.
+ if (!GV->hasGlobalUnnamedAddr() || !GV->hasInitializer() ||
+ !GV->isConstant() || !GV->isDiscardableIfUnused() ||
+ !dyn_cast<GlobalValue>(GV->getOperand(0)))
+ return false;
+
+ // To be a got equivalent, at least one of its users need to be a constant
+ // expression used by another global variable.
+ for (auto *U : GV->users())
+ NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U));
+
+ return NumGOTEquivUsers > 0;
+}
+
+/// \brief Unnamed constant global variables solely contaning a pointer to
+/// another globals variable is equivalent to a GOT table entry; it contains the
+/// the address of another symbol. Optimize it and replace accesses to these
+/// "GOT equivalents" by using the GOT entry for the final global instead.
+/// Compute GOT equivalent candidates among all global variables to avoid
+/// emitting them if possible later on, after it use is replaced by a GOT entry
+/// access.
+void AsmPrinter::computeGlobalGOTEquivs(Module &M) {
+ if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+ return;
+
+ for (const auto &G : M.globals()) {
+ unsigned NumGOTEquivUsers = 0;
+ if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers))
+ continue;
+
+ const MCSymbol *GOTEquivSym = getSymbol(&G);
+ GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers);
+ }
+}
+
+/// \brief Constant expressions using GOT equivalent globals may not be eligible
+/// for PC relative GOT entry conversion, in such cases we need to emit such
+/// globals we previously omitted in EmitGlobalVariable.
+void AsmPrinter::emitGlobalGOTEquivs() {
+ if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
+ return;
+
+ SmallVector<const GlobalVariable *, 8> FailedCandidates;
+ for (auto &I : GlobalGOTEquivs) {
+ const GlobalVariable *GV = I.second.first;
+ unsigned Cnt = I.second.second;
+ if (Cnt)
+ FailedCandidates.push_back(GV);
+ }
+ GlobalGOTEquivs.clear();
+
+ for (auto *GV : FailedCandidates)
+ EmitGlobalVariable(GV);
+}
+
+void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
+ const GlobalIndirectSymbol& GIS) {
+ MCSymbol *Name = getSymbol(&GIS);
+
+ if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_Global);
+ else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
+
+ // Set the symbol type to function if the alias has a function type.
+ // This affects codegen when the aliasee is not a function.
+ if (GIS.getType()->getPointerElementType()->isFunctionTy()) {
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
+ if (isa<GlobalIFunc>(GIS))
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
+ }
+
+ EmitVisibility(Name, GIS.getVisibility());
+
+ const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol());
+
+ if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
+ OutStreamer->EmitSymbolAttribute(Name, MCSA_AltEntry);
+
+ // Emit the directives as assignments aka .set:
+ OutStreamer->EmitAssignment(Name, Expr);
+
+ if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) {
+ // If the aliasee does not correspond to a symbol in the output, i.e. the
+ // alias is not of an object or the aliased object is private, then set the
+ // size of the alias symbol from the type of the alias. We don't do this in
+ // other situations as the alias and aliasee having differing types but same
+ // size may be intentional.
+ const GlobalObject *BaseObject = GA->getBaseObject();
+ if (MAI->hasDotTypeDotSizeDirective() && GA->getValueType()->isSized() &&
+ (!BaseObject || BaseObject->hasPrivateLinkage())) {
+ const DataLayout &DL = M.getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GA->getValueType());
+ OutStreamer->emitELFSize(cast<MCSymbolELF>(Name),
+ MCConstantExpr::create(Size, OutContext));
+ }
+ }
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+ // Set the MachineFunction to nullptr so that we can catch attempted
+ // accesses to MF specific features at the module level and so that
+ // we can conditionalize accesses based on whether or not it is nullptr.
+ MF = nullptr;
+
+ // Gather all GOT equivalent globals in the module. We really need two
+ // passes over the globals: one to compute and another to avoid its emission
+ // in EmitGlobalVariable, otherwise we would not be able to handle cases
+ // where the got equivalent shows up before its use.
+ computeGlobalGOTEquivs(M);
+
+ // Emit global variables.
+ for (const auto &G : M.globals())
+ EmitGlobalVariable(&G);
+
+ // Emit remaining GOT equivalent globals.
+ emitGlobalGOTEquivs();
+
+ // Emit visibility info for declarations
+ for (const Function &F : M) {
+ if (!F.isDeclarationForLinker())
+ continue;
+ GlobalValue::VisibilityTypes V = F.getVisibility();
+ if (V == GlobalValue::DefaultVisibility)
+ continue;
+
+ MCSymbol *Name = getSymbol(&F);
+ EmitVisibility(Name, V, false);
+ }
+
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ // Emit module flags.
+ SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+ M.getModuleFlagsMetadata(ModuleFlags);
+ if (!ModuleFlags.empty())
+ TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM);
+
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer->SwitchSection(TLOF.getDataSection());
+ const DataLayout &DL = M.getDataLayout();
+
+ for (const auto &Stub : Stubs) {
+ OutStreamer->EmitLabel(Stub.first);
+ OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
+ DL.getPointerSize());
+ }
+ }
+ }
+
+ // Finalize debug and EH information.
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
+ TimePassesIsEnabled);
+ HI.Handler->endModule();
+ delete HI.Handler;
+ }
+ Handlers.clear();
+ DD = nullptr;
+
+ // If the target wants to know about weak references, print them all.
+ if (MAI->getWeakRefDirective()) {
+ // FIXME: This is not lazy, it would be nice to only print weak references
+ // to stuff that is actually used. Note that doing so would require targets
+ // to notice uses in operands (due to constant exprs etc). This should
+ // happen with the MC stuff eventually.
+
+ // Print out module-level global objects here.
+ for (const auto &GO : M.global_objects()) {
+ if (!GO.hasExternalWeakLinkage())
+ continue;
+ OutStreamer->EmitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference);
+ }
+ }
+
+ OutStreamer->AddBlankLine();
+
+ // Print aliases in topological order, that is, for each alias a = b,
+ // b must be printed before a.
+ // This is because on some targets (e.g. PowerPC) linker expects aliases in
+ // such an order to generate correct TOC information.
+ SmallVector<const GlobalAlias *, 16> AliasStack;
+ SmallPtrSet<const GlobalAlias *, 16> AliasVisited;
+ for (const auto &Alias : M.aliases()) {
+ for (const GlobalAlias *Cur = &Alias; Cur;
+ Cur = dyn_cast<GlobalAlias>(Cur->getAliasee())) {
+ if (!AliasVisited.insert(Cur).second)
+ break;
+ AliasStack.push_back(Cur);
+ }
+ for (const GlobalAlias *AncestorAlias : reverse(AliasStack))
+ emitGlobalIndirectSymbol(M, *AncestorAlias);
+ AliasStack.clear();
+ }
+ for (const auto &IFunc : M.ifuncs())
+ emitGlobalIndirectSymbol(M, IFunc);
+
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I))
+ MP->finishAssembly(M, *MI, *this);
+
+ // Emit llvm.ident metadata in an '.ident' directive.
+ EmitModuleIdents(M);
+
+ // Emit __morestack address if needed for indirect calls.
+ if (MMI->usesMorestackAddr()) {
+ unsigned Align = 1;
+ MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(
+ getDataLayout(), SectionKind::getReadOnly(),
+ /*C=*/nullptr, Align);
+ OutStreamer->SwitchSection(ReadOnlySection);
+
+ MCSymbol *AddrSymbol =
+ OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
+ OutStreamer->EmitLabel(AddrSymbol);
+
+ unsigned PtrSize = M.getDataLayout().getPointerSize(0);
+ OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
+ PtrSize);
+ }
+
+ // If we don't have any trampolines, then we don't require stack memory
+ // to be executable. Some targets have a directive to declare this.
+ Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+ if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+ if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+ OutStreamer->SwitchSection(S);
+
+ // Allow the target to emit any magic that it wants at the end of the file,
+ // after everything else has gone out.
+ EmitEndOfAsmFile(M);
+
+ delete Mang; Mang = nullptr;
+ MMI = nullptr;
+
+ OutStreamer->Finish();
+ OutStreamer->reset();
+
+ return false;
+}
+
+MCSymbol *AsmPrinter::getCurExceptionSym() {
+ if (!CurExceptionSym)
+ CurExceptionSym = createTempSymbol("exception");
+ return CurExceptionSym;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+ this->MF = &MF;
+ // Get the function symbol.
+ CurrentFnSym = getSymbol(MF.getFunction());
+ CurrentFnSymForSize = CurrentFnSym;
+ CurrentFnBegin = nullptr;
+ CurExceptionSym = nullptr;
+ bool NeedsLocalForSize = MAI->needsLocalForSize();
+ if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+ MMI->hasEHFunclets() || NeedsLocalForSize) {
+ CurrentFnBegin = createTempSymbol("func_begin");
+ if (NeedsLocalForSize)
+ CurrentFnSymForSize = CurrentFnBegin;
+ }
+
+ if (isVerbose())
+ LI = &getAnalysis<MachineLoopInfo>();
+}
+
+namespace {
+// Keep track the alignment, constpool entries per Section.
+ struct SectionCPs {
+ MCSection *S;
+ unsigned Alignment;
+ SmallVector<unsigned, 4> CPEs;
+ SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {}
+ };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool() {
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+ if (CP.empty()) return;
+
+ // Calculate sections for constant pool entries. We collect entries to go into
+ // the same section together to reduce amount of section switch statements.
+ SmallVector<SectionCPs, 4> CPSections;
+ for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+ const MachineConstantPoolEntry &CPE = CP[i];
+ unsigned Align = CPE.getAlignment();
+
+ SectionKind Kind = CPE.getSectionKind(&getDataLayout());
+
+ const Constant *C = nullptr;
+ if (!CPE.isMachineConstantPoolEntry())
+ C = CPE.Val.ConstVal;
+
+ MCSection *S = getObjFileLowering().getSectionForConstant(getDataLayout(),
+ Kind, C, Align);
+
+ // The number of sections are small, just do a linear search from the
+ // last section to the first.
+ bool Found = false;
+ unsigned SecIdx = CPSections.size();
+ while (SecIdx != 0) {
+ if (CPSections[--SecIdx].S == S) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ SecIdx = CPSections.size();
+ CPSections.push_back(SectionCPs(S, Align));
+ }
+
+ if (Align > CPSections[SecIdx].Alignment)
+ CPSections[SecIdx].Alignment = Align;
+ CPSections[SecIdx].CPEs.push_back(i);
+ }
+
+ // Now print stuff into the calculated sections.
+ const MCSection *CurSection = nullptr;
+ unsigned Offset = 0;
+ for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+ for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+ unsigned CPI = CPSections[i].CPEs[j];
+ MCSymbol *Sym = GetCPISymbol(CPI);
+ if (!Sym->isUndefined())
+ continue;
+
+ if (CurSection != CPSections[i].S) {
+ OutStreamer->SwitchSection(CPSections[i].S);
+ EmitAlignment(Log2_32(CPSections[i].Alignment));
+ CurSection = CPSections[i].S;
+ Offset = 0;
+ }
+
+ MachineConstantPoolEntry CPE = CP[CPI];
+
+ // Emit inter-object padding for alignment.
+ unsigned AlignMask = CPE.getAlignment() - 1;
+ unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+ OutStreamer->EmitZeros(NewOffset - Offset);
+
+ Type *Ty = CPE.getType();
+ Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty);
+
+ OutStreamer->EmitLabel(Sym);
+ if (CPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal);
+ }
+ }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.
+///
+void AsmPrinter::EmitJumpTableInfo() {
+ const DataLayout &DL = MF->getDataLayout();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (!MJTI) return;
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ // Pick the directive to use to print the jump table entries, and switch to
+ // the appropriate section.
+ const Function *F = MF->getFunction();
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+ bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
+ MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
+ *F);
+ if (JTInDiffSection) {
+ // Drop it in the readonly section.
+ MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM);
+ OutStreamer->SwitchSection(ReadOnlySection);
+ }
+
+ EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL)));
+
+ // Jump tables in code sections are marked with a data_region directive
+ // where that's supported.
+ if (!JTInDiffSection)
+ OutStreamer->EmitDataRegion(MCDR_DataRegionJT32);
+
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ // For the EK_LabelDifference32 entry, if using .set avoids a relocation,
+ /// emit a .set directive for each unique entry.
+ if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+ MAI->doesSetDirectiveSuppressReloc()) {
+ SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+ const MachineBasicBlock *MBB = JTBBs[ii];
+ if (!EmittedSets.insert(MBB).second)
+ continue;
+
+ // .set LJTSet, LBB32-base
+ const MCExpr *LHS =
+ MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+ OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+ MCBinaryExpr::createSub(LHS, Base,
+ OutContext));
+ }
+ }
+
+ // On some targets (e.g. Darwin) we want to emit two consecutive labels
+ // before each jump table. The first label is never referenced, but tells
+ // the assembler and linker the extents of the jump table object. The
+ // second label is actually referenced by the code.
+ if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix())
+ // FIXME: This doesn't have to have any specific name, just any randomly
+ // named and numbered 'l' label would work. Simplify GetJTISymbol.
+ OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
+
+ OutStreamer->EmitLabel(GetJTISymbol(JTI));
+
+ for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+ EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ }
+ if (!JTInDiffSection)
+ OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+}
+
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned UID) const {
+ assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
+ const MCExpr *Value = nullptr;
+ switch (MJTI->getEntryKind()) {
+ case MachineJumpTableInfo::EK_Inline:
+ llvm_unreachable("Cannot emit EK_Inline jump table entry");
+ case MachineJumpTableInfo::EK_Custom32:
+ Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry(
+ MJTI, MBB, UID, OutContext);
+ break;
+ case MachineJumpTableInfo::EK_BlockAddress:
+ // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+ // .word LBB123
+ Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+ break;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+ // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gprel32 LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress: {
+ // EK_GPRel64BlockAddress - Each entry is an address of block, encoded
+ // with a relocation as gp-relative, e.g.:
+ // .gpdword LBB123
+ MCSymbol *MBBSym = MBB->getSymbol();
+ OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext));
+ return;
+ }
+
+ case MachineJumpTableInfo::EK_LabelDifference32: {
+ // Each entry is the address of the block minus the address of the jump
+ // table. This is used for PIC jump tables where gprel32 is not supported.
+ // e.g.:
+ // .word LBB123 - LJTI1_2
+ // If the .set directive avoids relocations, this is emitted as:
+ // .set L4_5_set_123, LBB123 - LJTI1_2
+ // .word L4_5_set_123
+ if (MAI->doesSetDirectiveSuppressReloc()) {
+ Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()),
+ OutContext);
+ break;
+ }
+ Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext);
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ break;
+ }
+ }
+
+ assert(Value && "Unknown entry kind!");
+
+ unsigned EntrySize = MJTI->getEntrySize(getDataLayout());
+ OutStreamer->EmitValue(Value, EntrySize);
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM. If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+ if (GV->getName() == "llvm.used") {
+ if (MAI->hasNoDeadStrip()) // No need to emit this at all.
+ EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
+ return true;
+ }
+
+ // Ignore debug and non-emitted data. This handles llvm.compiler.used.
+ if (GV->getSection() == "llvm.metadata" ||
+ GV->hasAvailableExternallyLinkage())
+ return true;
+
+ if (!GV->hasAppendingLinkage()) return false;
+
+ assert(GV->hasInitializer() && "Not a special LLVM global!");
+
+ if (GV->getName() == "llvm.global_ctors") {
+ EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ /* isCtor */ true);
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".constructors_used");
+ OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ if (GV->getName() == "llvm.global_dtors") {
+ EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+ /* isCtor */ false);
+
+ if (TM.getRelocationModel() == Reloc::Static &&
+ MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+ StringRef Sym(".destructors_used");
+ OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
+ MCSA_Reference);
+ }
+ return true;
+ }
+
+ report_fatal_error("unknown special variable");
+}
+
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
+ // Should be an array of 'i8*'.
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+ const GlobalValue *GV =
+ dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+ if (GV)
+ OutStreamer->EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
+ }
+}
+
+namespace {
+struct Structor {
+ Structor() : Priority(0), Func(nullptr), ComdatKey(nullptr) {}
+ int Priority;
+ llvm::Constant *Func;
+ llvm::GlobalValue *ComdatKey;
+};
+} // end namespace
+
+/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
+/// priority.
+void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool isCtor) {
+ // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // init priority.
+ if (!isa<ConstantArray>(List)) return;
+
+ // Sanity check the structors list.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+ if (!InitList) return; // Not an array!
+ StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
+ // FIXME: Only allow the 3-field form in LLVM 4.0.
+ if (!ETy || ETy->getNumElements() < 2 || ETy->getNumElements() > 3)
+ return; // Not an array of two or three elements!
+ if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
+ !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
+ if (ETy->getNumElements() == 3 && !isa<PointerType>(ETy->getTypeAtIndex(2U)))
+ return; // Not (int, ptr, ptr).
+
+ // Gather the structors in a form that's convenient for sorting by priority.
+ SmallVector<Structor, 8> Structors;
+ for (Value *O : InitList->operands()) {
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(O);
+ if (!CS) continue; // Malformed.
+ if (CS->getOperand(1)->isNullValue())
+ break; // Found a null terminator, skip the rest.
+ ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+ if (!Priority) continue; // Malformed.
+ Structors.push_back(Structor());
+ Structor &S = Structors.back();
+ S.Priority = Priority->getLimitedValue(65535);
+ S.Func = CS->getOperand(1);
+ if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue())
+ S.ComdatKey =
+ dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
+ }
+
+ // Emit the function pointers in the target-specific order
+ unsigned Align = Log2_32(DL.getPointerPrefAlignment());
+ std::stable_sort(Structors.begin(), Structors.end(),
+ [](const Structor &L,
+ const Structor &R) { return L.Priority < R.Priority; });
+ for (Structor &S : Structors) {
+ const TargetLoweringObjectFile &Obj = getObjFileLowering();
+ const MCSymbol *KeySym = nullptr;
+ if (GlobalValue *GV = S.ComdatKey) {
+ if (GV->hasAvailableExternallyLinkage())
+ // If the associated variable is available_externally, some other TU
+ // will provide its dynamic initializer.
+ continue;
+
+ KeySym = getSymbol(GV);
+ }
+ MCSection *OutputSection =
+ (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
+ : Obj.getStaticDtorSection(S.Priority, KeySym));
+ OutStreamer->SwitchSection(OutputSection);
+ if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
+ EmitAlignment(Align);
+ EmitXXStructor(DL, S.Func);
+ }
+}
+
+void AsmPrinter::EmitModuleIdents(Module &M) {
+ if (!MAI->hasIdentDirective())
+ return;
+
+ if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ assert(N->getNumOperands() == 1 &&
+ "llvm.ident metadata entry can have only one operand");
+ const MDString *S = cast<MDString>(N->getOperand(0));
+ OutStreamer->EmitIdent(S->getString());
+ }
+ }
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+ OutStreamer->EmitIntValue(Value, 1);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+ OutStreamer->EmitIntValue(Value, 2);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+ OutStreamer->EmitIntValue(Value, 4);
+}
+
+/// Emit something like ".long Hi-Lo" where the size in bytes of the directive
+/// is specified by Size and Hi/Lo specify the labels. This implicitly uses
+/// .set if it avoids relocations.
+void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+ unsigned Size) const {
+ OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size);
+}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ unsigned Size,
+ bool IsSectionRelative) const {
+ if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
+ OutStreamer->EmitCOFFSecRel32(Label);
+ return;
+ }
+
+ // Emit Label+Offset (or just Label if Offset is zero)
+ const MCExpr *Expr = MCSymbolRefExpr::create(Label, OutContext);
+ if (Offset)
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(Offset, OutContext), OutContext);
+
+ OutStreamer->EmitValue(Expr, Size);
+}
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary. For example, if you pass in 3 here, you will get an 8
+// byte alignment. If a global value is specified, and if that global has
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
+ if (GV)
+ NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits);
+
+ if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
+
+ assert(NumBits <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
+ if (getCurrentSection()->getKind().isText())
+ OutStreamer->EmitCodeAlignment(1u << NumBits);
+ else
+ OutStreamer->EmitValueToAlignment(1u << NumBits);
+}
+
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
+const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
+ MCContext &Ctx = OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ return MCSymbolRefExpr::create(getSymbol(GV), Ctx);
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+ return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx);
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (!CE) {
+ llvm_unreachable("Unknown constant value to lower!");
+ }
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using DataLayout as a
+ // last resort before giving up.
+ if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
+ if (C != CE)
+ return lowerConstant(C);
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/false,
+ !MF ? nullptr : MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ case Instruction::GetElementPtr: {
+ // Generate a symbolic expression for the byte address
+ APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(getDataLayout(), OffsetAI);
+
+ const MCExpr *Base = lowerConstant(CE->getOperand(0));
+ if (!OffsetAI)
+ return Base;
+
+ int64_t Offset = OffsetAI.getSExtValue();
+ return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return lowerConstant(CE->getOperand(0));
+
+ case Instruction::IntToPtr: {
+ const DataLayout &DL = getDataLayout();
+
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
+ false/*ZExt*/);
+ return lowerConstant(Op);
+ }
+
+ case Instruction::PtrToInt: {
+ const DataLayout &DL = getDataLayout();
+
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = lowerConstant(Op);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ case Instruction::Sub: {
+ GlobalValue *LHSGV;
+ APInt LHSOffset;
+ if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHSGV, LHSOffset,
+ getDataLayout())) {
+ GlobalValue *RHSGV;
+ APInt RHSOffset;
+ if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset,
+ getDataLayout())) {
+ const MCExpr *RelocExpr = getObjFileLowering().lowerRelativeReference(
+ LHSGV, RHSGV, *Mang, TM);
+ if (!RelocExpr)
+ RelocExpr = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx),
+ MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx);
+ int64_t Addend = (LHSOffset - RHSOffset).getSExtValue();
+ if (Addend != 0)
+ RelocExpr = MCBinaryExpr::createAdd(
+ RelocExpr, MCConstantExpr::create(Addend, Ctx), Ctx);
+ return RelocExpr;
+ }
+ }
+ }
+ // else fallthrough
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::Shl:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ const MCExpr *LHS = lowerConstant(CE->getOperand(0));
+ const MCExpr *RHS = lowerConstant(CE->getOperand(1));
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
+ case Instruction::Sub: return MCBinaryExpr::createSub(LHS, RHS, Ctx);
+ case Instruction::Mul: return MCBinaryExpr::createMul(LHS, RHS, Ctx);
+ case Instruction::SDiv: return MCBinaryExpr::createDiv(LHS, RHS, Ctx);
+ case Instruction::SRem: return MCBinaryExpr::createMod(LHS, RHS, Ctx);
+ case Instruction::Shl: return MCBinaryExpr::createShl(LHS, RHS, Ctx);
+ case Instruction::And: return MCBinaryExpr::createAnd(LHS, RHS, Ctx);
+ case Instruction::Or: return MCBinaryExpr::createOr (LHS, RHS, Ctx);
+ case Instruction::Xor: return MCBinaryExpr::createXor(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
+ AsmPrinter &AP,
+ const Constant *BaseCV = nullptr,
+ uint64_t Offset = 0);
+
+static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP);
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const ConstantDataSequential *V) {
+ StringRef Data = V->getRawDataValues();
+ assert(!Data.empty() && "Empty aggregates should be CAZ node");
+ char C = Data[0];
+ for (unsigned i = 1, e = Data.size(); i != e; ++i)
+ if (Data[i] != C) return -1;
+ return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
+}
+
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value. If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ uint64_t Size = DL.getTypeAllocSizeInBits(V->getType());
+ assert(Size % 8 == 0);
+
+ // Extend the element to take zero padding into account.
+ APInt Value = CI->getValue().zextOrSelf(Size);
+ if (!Value.isSplat(8))
+ return -1;
+
+ return Value.zextOrTrunc(8).getZExtValue();
+ }
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+ // Make sure all array elements are sequences of the same repeated
+ // byte.
+ assert(CA->getNumOperands() != 0 && "Should be a CAZ");
+ Constant *Op0 = CA->getOperand(0);
+ int Byte = isRepeatedByteSequence(Op0, DL);
+ if (Byte == -1)
+ return -1;
+
+ // All array elements must be equal.
+ for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i)
+ if (CA->getOperand(i) != Op0)
+ return -1;
+ return Byte;
+ }
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
+ return isRepeatedByteSequence(CDS);
+
+ return -1;
+}
+
+static void emitGlobalConstantDataSequential(const DataLayout &DL,
+ const ConstantDataSequential *CDS,
+ AsmPrinter &AP) {
+
+ // See if we can aggregate this into a .fill, if so, emit it as such.
+ int Value = isRepeatedByteSequence(CDS, DL);
+ if (Value != -1) {
+ uint64_t Bytes = DL.getTypeAllocSize(CDS->getType());
+ // Don't emit a 1-byte object as a .fill.
+ if (Bytes > 1)
+ return AP.OutStreamer->emitFill(Bytes, Value);
+ }
+
+ // If this can be emitted with .ascii/.asciz, emit it as such.
+ if (CDS->isString())
+ return AP.OutStreamer->EmitBytes(CDS->getAsString());
+
+ // Otherwise, emit the values in successive locations.
+ unsigned ElementByteSize = CDS->getElementByteSize();
+ if (isa<IntegerType>(CDS->getElementType())) {
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ if (AP.isVerbose())
+ AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
+ CDS->getElementAsInteger(i));
+ AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i),
+ ElementByteSize);
+ }
+ } else {
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
+ emitGlobalConstantFP(cast<ConstantFP>(CDS->getElementAsConstant(I)), AP);
+ }
+
+ unsigned Size = DL.getTypeAllocSize(CDS->getType());
+ unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
+ CDS->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer->EmitZeros(Padding);
+
+}
+
+static void emitGlobalConstantArray(const DataLayout &DL,
+ const ConstantArray *CA, AsmPrinter &AP,
+ const Constant *BaseCV, uint64_t Offset) {
+ // See if we can aggregate some values. Make sure it can be
+ // represented as a series of bytes of the constant value.
+ int Value = isRepeatedByteSequence(CA, DL);
+
+ if (Value != -1) {
+ uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
+ AP.OutStreamer->emitFill(Bytes, Value);
+ }
+ else {
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+ emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset);
+ Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
+ }
+ }
+}
+
+static void emitGlobalConstantVector(const DataLayout &DL,
+ const ConstantVector *CV, AsmPrinter &AP) {
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ emitGlobalConstantImpl(DL, CV->getOperand(i), AP);
+
+ unsigned Size = DL.getTypeAllocSize(CV->getType());
+ unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
+ CV->getType()->getNumElements();
+ if (unsigned Padding = Size - EmittedSize)
+ AP.OutStreamer->EmitZeros(Padding);
+}
+
+static void emitGlobalConstantStruct(const DataLayout &DL,
+ const ConstantStruct *CS, AsmPrinter &AP,
+ const Constant *BaseCV, uint64_t Offset) {
+ // Print the fields in successive locations. Pad to align if needed!
+ unsigned Size = DL.getTypeAllocSize(CS->getType());
+ const StructLayout *Layout = DL.getStructLayout(CS->getType());
+ uint64_t SizeSoFar = 0;
+ for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+ const Constant *Field = CS->getOperand(i);
+
+ // Print the actual field value.
+ emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar);
+
+ // Check if padding is needed and insert one or more 0s.
+ uint64_t FieldSize = DL.getTypeAllocSize(Field->getType());
+ uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
+ - Layout->getElementOffset(i)) - FieldSize;
+ SizeSoFar += FieldSize + PadSize;
+
+ // Insert padding - this may include padding to increase the size of the
+ // current field up to the ABI size (if the struct is not packed) as well
+ // as padding to ensure that the next field starts at the right offset.
+ AP.OutStreamer->EmitZeros(PadSize);
+ }
+ assert(SizeSoFar == Layout->getSizeInBytes() &&
+ "Layout of constant struct may be incorrect!");
+}
+
+static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
+ APInt API = CFP->getValueAPF().bitcastToAPInt();
+
+ // First print a comment with what we think the original floating-point value
+ // should have been.
+ if (AP.isVerbose()) {
+ SmallString<8> StrVal;
+ CFP->getValueAPF().toString(StrVal);
+
+ if (CFP->getType())
+ CFP->getType()->print(AP.OutStreamer->GetCommentOS());
+ else
+ AP.OutStreamer->GetCommentOS() << "Printing <null> Type";
+ AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n';
+ }
+
+ // Now iterate through the APInt chunks, emitting them in endian-correct
+ // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit
+ // floats).
+ unsigned NumBytes = API.getBitWidth() / 8;
+ unsigned TrailingBytes = NumBytes % sizeof(uint64_t);
+ const uint64_t *p = API.getRawData();
+
+ // PPC's long double has odd notions of endianness compared to how LLVM
+ // handles it: p[0] goes first for *big* endian on PPC.
+ if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) {
+ int Chunk = API.getNumWords() - 1;
+
+ if (TrailingBytes)
+ AP.OutStreamer->EmitIntValue(p[Chunk--], TrailingBytes);
+
+ for (; Chunk >= 0; --Chunk)
+ AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t));
+ } else {
+ unsigned Chunk;
+ for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk)
+ AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t));
+
+ if (TrailingBytes)
+ AP.OutStreamer->EmitIntValue(p[Chunk], TrailingBytes);
+ }
+
+ // Emit the tail padding for the long double.
+ const DataLayout &DL = AP.getDataLayout();
+ AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
+ DL.getTypeStoreSize(CFP->getType()));
+}
+
+static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
+ const DataLayout &DL = AP.getDataLayout();
+ unsigned BitWidth = CI->getBitWidth();
+
+ // Copy the value as we may massage the layout for constants whose bit width
+ // is not a multiple of 64-bits.
+ APInt Realigned(CI->getValue());
+ uint64_t ExtraBits = 0;
+ unsigned ExtraBitsSize = BitWidth & 63;
+
+ if (ExtraBitsSize) {
+ // The bit width of the data is not a multiple of 64-bits.
+ // The extra bits are expected to be at the end of the chunk of the memory.
+ // Little endian:
+ // * Nothing to be done, just record the extra bits to emit.
+ // Big endian:
+ // * Record the extra bits to emit.
+ // * Realign the raw data to emit the chunks of 64-bits.
+ if (DL.isBigEndian()) {
+ // Basically the structure of the raw data is a chunk of 64-bits cells:
+ // 0 1 BitWidth / 64
+ // [chunk1][chunk2] ... [chunkN].
+ // The most significant chunk is chunkN and it should be emitted first.
+ // However, due to the alignment issue chunkN contains useless bits.
+ // Realign the chunks so that they contain only useless information:
+ // ExtraBits 0 1 (BitWidth / 64) - 1
+ // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN]
+ ExtraBits = Realigned.getRawData()[0] &
+ (((uint64_t)-1) >> (64 - ExtraBitsSize));
+ Realigned = Realigned.lshr(ExtraBitsSize);
+ } else
+ ExtraBits = Realigned.getRawData()[BitWidth / 64];
+ }
+
+ // We don't expect assemblers to support integer data directives
+ // for more than 64 bits, so we emit the data in at most 64-bit
+ // quantities at a time.
+ const uint64_t *RawData = Realigned.getRawData();
+ for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+ uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i];
+ AP.OutStreamer->EmitIntValue(Val, 8);
+ }
+
+ if (ExtraBitsSize) {
+ // Emit the extra bits after the 64-bits chunks.
+
+ // Emit a directive that fills the expected size.
+ uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType());
+ Size -= (BitWidth / 64) * 8;
+ assert(Size && Size * 8 >= ExtraBitsSize &&
+ (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
+ == ExtraBits && "Directive too small for extra bits.");
+ AP.OutStreamer->EmitIntValue(ExtraBits, Size);
+ }
+}
+
+/// \brief Transform a not absolute MCExpr containing a reference to a GOT
+/// equivalent global, by a target specific GOT pc relative access to the
+/// final symbol.
+static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
+ const Constant *BaseCst,
+ uint64_t Offset) {
+ // The global @foo below illustrates a global that uses a got equivalent.
+ //
+ // @bar = global i32 42
+ // @gotequiv = private unnamed_addr constant i32* @bar
+ // @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64),
+ // i64 ptrtoint (i32* @foo to i64))
+ // to i32)
+ //
+ // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually
+ // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the
+ // form:
+ //
+ // foo = cstexpr, where
+ // cstexpr := <gotequiv> - "." + <cst>
+ // cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst>
+ //
+ // After canonicalization by evaluateAsRelocatable `ME` turns into:
+ //
+ // cstexpr := <gotequiv> - <foo> + gotpcrelcst, where
+ // gotpcrelcst := <offset from @foo base> + <cst>
+ //
+ MCValue MV;
+ if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute())
+ return;
+ const MCSymbolRefExpr *SymA = MV.getSymA();
+ if (!SymA)
+ return;
+
+ // Check that GOT equivalent symbol is cached.
+ const MCSymbol *GOTEquivSym = &SymA->getSymbol();
+ if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
+ return;
+
+ const GlobalValue *BaseGV = dyn_cast_or_null<GlobalValue>(BaseCst);
+ if (!BaseGV)
+ return;
+
+ // Check for a valid base symbol
+ const MCSymbol *BaseSym = AP.getSymbol(BaseGV);
+ const MCSymbolRefExpr *SymB = MV.getSymB();
+
+ if (!SymB || BaseSym != &SymB->getSymbol())
+ return;
+
+ // Make sure to match:
+ //
+ // gotpcrelcst := <offset from @foo base> + <cst>
+ //
+ // If gotpcrelcst is positive it means that we can safely fold the pc rel
+ // displacement into the GOTPCREL. We can also can have an extra offset <cst>
+ // if the target knows how to encode it.
+ //
+ int64_t GOTPCRelCst = Offset + MV.getConstant();
+ if (GOTPCRelCst < 0)
+ return;
+ if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0)
+ return;
+
+ // Emit the GOT PC relative to replace the got equivalent global, i.e.:
+ //
+ // bar:
+ // .long 42
+ // gotequiv:
+ // .quad bar
+ // foo:
+ // .long gotequiv - "." + <cst>
+ //
+ // is replaced by the target specific equivalent to:
+ //
+ // bar:
+ // .long 42
+ // foo:
+ // .long bar@GOTPCREL+<gotpcrelcst>
+ //
+ AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
+ const GlobalVariable *GV = Result.first;
+ int NumUses = (int)Result.second;
+ const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
+ const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
+ *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(
+ FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer);
+
+ // Update GOT equivalent usage information
+ --NumUses;
+ if (NumUses >= 0)
+ AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
+}
+
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
+ AsmPrinter &AP, const Constant *BaseCV,
+ uint64_t Offset) {
+ uint64_t Size = DL.getTypeAllocSize(CV->getType());
+
+ // Globals with sub-elements such as combinations of arrays and structs
+ // are handled recursively by emitGlobalConstantImpl. Keep track of the
+ // constant symbol base and the current position with BaseCV and Offset.
+ if (!BaseCV && CV->hasOneUse())
+ BaseCV = dyn_cast<Constant>(CV->user_back());
+
+ if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
+ return AP.OutStreamer->EmitZeros(Size);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ switch (Size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ if (AP.isVerbose())
+ AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
+ CI->getZExtValue());
+ AP.OutStreamer->EmitIntValue(CI->getZExtValue(), Size);
+ return;
+ default:
+ emitGlobalConstantLargeInt(CI, AP);
+ return;
+ }
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
+ return emitGlobalConstantFP(CFP, AP);
+
+ if (isa<ConstantPointerNull>(CV)) {
+ AP.OutStreamer->EmitIntValue(0, Size);
+ return;
+ }
+
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
+ return emitGlobalConstantDataSequential(DL, CDS, AP);
+
+ if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+ return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset);
+
+ if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+ return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+ // vectors).
+ if (CE->getOpcode() == Instruction::BitCast)
+ return emitGlobalConstantImpl(DL, CE->getOperand(0), AP);
+
+ if (Size > 8) {
+ // If the constant expression's size is greater than 64-bits, then we have
+ // to emit the value in chunks. Try to constant fold the value and emit it
+ // that way.
+ Constant *New = ConstantFoldConstantExpression(CE, DL);
+ if (New && New != CE)
+ return emitGlobalConstantImpl(DL, New, AP);
+ }
+ }
+
+ if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+ return emitGlobalConstantVector(DL, V, AP);
+
+ // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it
+ // thread the streamer with EmitValue.
+ const MCExpr *ME = AP.lowerConstant(CV);
+
+ // Since lowerConstant already folded and got rid of all IR pointer and
+ // integer casts, detect GOT equivalent accesses by looking into the MCExpr
+ // directly.
+ if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel())
+ handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset);
+
+ AP.OutStreamer->EmitValue(ME, Size);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) {
+ uint64_t Size = DL.getTypeAllocSize(CV->getType());
+ if (Size)
+ emitGlobalConstantImpl(DL, CV, *this);
+ else if (MAI->hasSubsectionsViaSymbols()) {
+ // If the global has zero size, emit a single byte so that two labels don't
+ // look like they are at the same location.
+ OutStreamer->EmitIntValue(0, 1);
+ }
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ // Target doesn't support this yet!
+ llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
+}
+
+void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
+ if (Offset > 0)
+ OS << '+' << Offset;
+ else if (Offset < 0)
+ OS << Offset;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Lowering Routines.
+//===----------------------------------------------------------------------===//
+
+MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const {
+ return OutContext.createTempSymbol(Name, true);
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+ return MMI->getAddrLabelSymbol(BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+ return MMI->getAddrLabelSymbol(BB);
+}
+
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ const DataLayout &DL = getDataLayout();
+ return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "CPI" + Twine(getFunctionNumber()) + "_" +
+ Twine(CPID));
+}
+
+/// GetJTISymbol - Return the symbol for the specified jump table entry.
+MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
+ return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+ const DataLayout &DL = getDataLayout();
+ return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ Twine(getFunctionNumber()) + "_" +
+ Twine(UID) + "_set_" + Twine(MBBID));
+}
+
+MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
+ StringRef Suffix) const {
+ return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, *Mang,
+ TM);
+}
+
+/// Return the MCSymbol for the specified ExternalSymbol.
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+ SmallString<60> NameStr;
+ Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout());
+ return OutContext.getOrCreateSymbol(NameStr);
+}
+
+
+
+/// PrintParentLoopComment - Print comments about parent loops of this one.
+static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ if (!Loop) return;
+ PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
+ OS.indent(Loop->getLoopDepth()*2)
+ << "Parent Loop BB" << FunctionNumber << "_"
+ << Loop->getHeader()->getNumber()
+ << " Depth=" << Loop->getLoopDepth() << '\n';
+}
+
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+ unsigned FunctionNumber) {
+ // Add child loop information
+ for (const MachineLoop *CL : *Loop) {
+ OS.indent(CL->getLoopDepth()*2)
+ << "Child Loop BB" << FunctionNumber << "_"
+ << CL->getHeader()->getNumber() << " Depth " << CL->getLoopDepth()
+ << '\n';
+ PrintChildLoopComment(OS, CL, FunctionNumber);
+ }
+}
+
+/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+ const MachineLoopInfo *LI,
+ const AsmPrinter &AP) {
+ // Add loop depth information
+ const MachineLoop *Loop = LI->getLoopFor(&MBB);
+ if (!Loop) return;
+
+ MachineBasicBlock *Header = Loop->getHeader();
+ assert(Header && "No header for loop");
+
+ // If this block is not a loop header, just print out what is the loop header
+ // and return.
+ if (Header != &MBB) {
+ AP.OutStreamer->AddComment(" in Loop: Header=BB" +
+ Twine(AP.getFunctionNumber())+"_" +
+ Twine(Loop->getHeader()->getNumber())+
+ " Depth="+Twine(Loop->getLoopDepth()));
+ return;
+ }
+
+ // Otherwise, it is a loop header. Print out information about child and
+ // parent loops.
+ raw_ostream &OS = AP.OutStreamer->GetCommentOS();
+
+ PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
+
+ OS << "=>";
+ OS.indent(Loop->getLoopDepth()*2-2);
+
+ OS << "This ";
+ if (Loop->empty())
+ OS << "Inner ";
+ OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
+
+ PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
+}
+
+
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+ // End the previous funclet and start a new one.
+ if (MBB.isEHFuncletEntry()) {
+ for (const HandlerInfo &HI : Handlers) {
+ HI.Handler->endFunclet();
+ HI.Handler->beginFunclet(MBB);
+ }
+ }
+
+ // Emit an alignment directive for this block, if needed.
+ if (unsigned Align = MBB.getAlignment())
+ EmitAlignment(Align);
+
+ // If the block has its address taken, emit any labels that were used to
+ // reference the block. It is possible that there is more than one label
+ // here, because multiple LLVM BB's may have been RAUW'd to this block after
+ // the references were generated.
+ if (MBB.hasAddressTaken()) {
+ const BasicBlock *BB = MBB.getBasicBlock();
+ if (isVerbose())
+ OutStreamer->AddComment("Block address taken");
+
+ // MBBs can have their address taken as part of CodeGen without having
+ // their corresponding BB's address taken in IR
+ if (BB->hasAddressTaken())
+ for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
+ OutStreamer->EmitLabel(Sym);
+ }
+
+ // Print some verbose block comments.
+ if (isVerbose()) {
+ if (const BasicBlock *BB = MBB.getBasicBlock()) {
+ if (BB->hasName()) {
+ BB->printAsOperand(OutStreamer->GetCommentOS(),
+ /*PrintType=*/false, BB->getModule());
+ OutStreamer->GetCommentOS() << '\n';
+ }
+ }
+ emitBasicBlockLoopComments(MBB, LI, *this);
+ }
+
+ // Print the main label for the block.
+ if (MBB.pred_empty() ||
+ (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) {
+ if (isVerbose()) {
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
+ }
+ } else {
+ OutStreamer->EmitLabel(MBB.getSymbol());
+ }
+}
+
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+ bool IsDefinition) const {
+ MCSymbolAttr Attr = MCSA_Invalid;
+
+ switch (Visibility) {
+ default: break;
+ case GlobalValue::HiddenVisibility:
+ if (IsDefinition)
+ Attr = MAI->getHiddenVisibilityAttr();
+ else
+ Attr = MAI->getHiddenDeclarationVisibilityAttr();
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Attr = MAI->getProtectedVisibilityAttr();
+ break;
+ }
+
+ if (Attr != MCSA_Invalid)
+ OutStreamer->EmitSymbolAttribute(Sym, Attr);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isEHPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ if (MBB->pred_size() > 1)
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Check the terminators in the previous blocks
+ for (const auto &MI : Pred->terminators()) {
+ // If it is not a simple branch, we are in a table somewhere.
+ if (!MI.isBranch() || MI.isIndirectBranch())
+ return false;
+
+ // If we are the operands of one of the branches, this is not a fall
+ // through. Note that targets with delay slots will usually bundle
+ // terminators with the delay slot instruction.
+ for (ConstMIBundleOperands OP(MI); OP.isValid(); ++OP) {
+ if (OP->isJTI())
+ return false;
+ if (OP->isMBB() && OP->getMBB() == MBB)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
+ if (!S.usesMetadata())
+ return nullptr;
+
+ assert(!S.useStatepoints() && "statepoints do not currently support custom"
+ " stackmap formats, please see the documentation for a description of"
+ " the default format. If you really need a custom serialized format,"
+ " please file a bug");
+
+ gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+ gcp_map_type::iterator GCPI = GCMap.find(&S);
+ if (GCPI != GCMap.end())
+ return GCPI->second.get();
+
+ const char *Name = S.getName().c_str();
+
+ for (GCMetadataPrinterRegistry::iterator
+ I = GCMetadataPrinterRegistry::begin(),
+ E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+ if (strcmp(Name, I->getName()) == 0) {
+ std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate();
+ GMP->S = &S;
+ auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP)));
+ return IterBool.first->second.get();
+ }
+
+ report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+}
+
+/// Pin vtable to this file.
+AsmPrinterHandler::~AsmPrinterHandler() {}
+
+void AsmPrinterHandler::markFunctionEnd() {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
new file mode 100644
index 000000000000..60f40d063cc8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -0,0 +1,290 @@
+//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dwarf emissions parts of AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ByteStreamer.h"
+#include "DwarfDebug.h"
+#include "DwarfExpression.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+//===----------------------------------------------------------------------===//
+// Dwarf Emission Helper Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
+ if (isVerbose() && Desc)
+ OutStreamer->AddComment(Desc);
+
+ OutStreamer->EmitSLEB128IntValue(Value);
+}
+
+/// EmitULEB128 - emit the specified unsigned leb128 value.
+void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
+ unsigned PadTo) const {
+ if (isVerbose() && Desc)
+ OutStreamer->AddComment(Desc);
+
+ OutStreamer->EmitULEB128IntValue(Value, PadTo);
+}
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+ switch (Encoding) {
+ case dwarf::DW_EH_PE_absptr:
+ return "absptr";
+ case dwarf::DW_EH_PE_omit:
+ return "omit";
+ case dwarf::DW_EH_PE_pcrel:
+ return "pcrel";
+ case dwarf::DW_EH_PE_udata4:
+ return "udata4";
+ case dwarf::DW_EH_PE_udata8:
+ return "udata8";
+ case dwarf::DW_EH_PE_sdata4:
+ return "sdata4";
+ case dwarf::DW_EH_PE_sdata8:
+ return "sdata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
+ return "pcrel udata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
+ return "pcrel sdata4";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
+ return "pcrel udata8";
+ case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
+ return "pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4
+ :
+ return "indirect pcrel udata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+ :
+ return "indirect pcrel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8
+ :
+ return "indirect pcrel udata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8
+ :
+ return "indirect pcrel sdata8";
+ }
+
+ return "<unknown encoding>";
+}
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding. If verbose assembly output is enabled, we output comments
+/// describing the encoding. Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
+ if (isVerbose()) {
+ if (Desc)
+ OutStreamer->AddComment(Twine(Desc) + " Encoding = " +
+ Twine(DecodeDWARFEncoding(Val)));
+ else
+ OutStreamer->AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val));
+ }
+
+ OutStreamer->EmitIntValue(Val, 1);
+}
+
+/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
+ if (Encoding == dwarf::DW_EH_PE_omit)
+ return 0;
+
+ switch (Encoding & 0x07) {
+ default:
+ llvm_unreachable("Invalid encoded value.");
+ case dwarf::DW_EH_PE_absptr:
+ return MF->getDataLayout().getPointerSize();
+ case dwarf::DW_EH_PE_udata2:
+ return 2;
+ case dwarf::DW_EH_PE_udata4:
+ return 4;
+ case dwarf::DW_EH_PE_udata8:
+ return 8;
+ }
+}
+
+void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
+ unsigned Encoding) const {
+ if (GV) {
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
+ const MCExpr *Exp =
+ TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI,
+ *OutStreamer);
+ OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
+ } else
+ OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
+ bool ForceOffset) const {
+ if (!ForceOffset) {
+ // On COFF targets, we have to emit the special .secrel32 directive.
+ if (MAI->needsDwarfSectionOffsetDirective()) {
+ OutStreamer->EmitCOFFSecRel32(Label);
+ return;
+ }
+
+ // If the format uses relocations with dwarf, refer to the symbol directly.
+ if (MAI->doesDwarfUseRelocationsAcrossSections()) {
+ OutStreamer->EmitSymbolValue(Label, 4);
+ return;
+ }
+ }
+
+ // Otherwise, emit it as a label difference from the start of the section.
+ EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
+}
+
+void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
+ if (MAI->doesDwarfUseRelocationsAcrossSections()) {
+ emitDwarfSymbolReference(S.getSymbol());
+ return;
+ }
+
+ // Just emit the offset directly; no need for symbol math.
+ EmitInt32(S.getOffset());
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
+ const MachineLocation &MLoc) const {
+ DebugLocDwarfExpression Expr(getDwarfDebug()->getDwarfVersion(), Streamer);
+ const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
+ int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
+ if (Reg < 0) {
+ // We assume that pointers are always in an addressable register.
+ if (MLoc.isIndirect())
+ // FIXME: We have no reasonable way of handling errors in here. The
+ // caller might be in the middle of a dwarf expression. We should
+ // probably assert that Reg >= 0 once debug info generation is more
+ // mature.
+ return Expr.EmitOp(dwarf::DW_OP_nop,
+ "nop (could not find a dwarf register number)");
+
+ // Attempt to find a valid super- or sub-register.
+ if (!Expr.AddMachineRegPiece(*MF->getSubtarget().getRegisterInfo(),
+ MLoc.getReg()))
+ Expr.EmitOp(dwarf::DW_OP_nop,
+ "nop (could not find a dwarf register number)");
+ return;
+ }
+
+ if (MLoc.isIndirect())
+ Expr.AddRegIndirect(Reg, MLoc.getOffset());
+ else
+ Expr.AddReg(Reg);
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Lowering Routines
+//===----------------------------------------------------------------------===//
+
+void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
+ switch (Inst.getOperation()) {
+ default:
+ llvm_unreachable("Unexpected instruction");
+ case MCCFIInstruction::OpDefCfaOffset:
+ OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
+ break;
+ case MCCFIInstruction::OpAdjustCfaOffset:
+ OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset());
+ break;
+ case MCCFIInstruction::OpDefCfa:
+ OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
+ break;
+ case MCCFIInstruction::OpDefCfaRegister:
+ OutStreamer->EmitCFIDefCfaRegister(Inst.getRegister());
+ break;
+ case MCCFIInstruction::OpOffset:
+ OutStreamer->EmitCFIOffset(Inst.getRegister(), Inst.getOffset());
+ break;
+ case MCCFIInstruction::OpRegister:
+ OutStreamer->EmitCFIRegister(Inst.getRegister(), Inst.getRegister2());
+ break;
+ case MCCFIInstruction::OpWindowSave:
+ OutStreamer->EmitCFIWindowSave();
+ break;
+ case MCCFIInstruction::OpSameValue:
+ OutStreamer->EmitCFISameValue(Inst.getRegister());
+ break;
+ case MCCFIInstruction::OpGnuArgsSize:
+ OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset());
+ break;
+ case MCCFIInstruction::OpEscape:
+ OutStreamer->EmitCFIEscape(Inst.getValues());
+ break;
+ }
+}
+
+void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
+ // Emit the code (index) for the abbreviation.
+ if (isVerbose())
+ OutStreamer->AddComment("Abbrev [" + Twine(Die.getAbbrevNumber()) + "] 0x" +
+ Twine::utohexstr(Die.getOffset()) + ":0x" +
+ Twine::utohexstr(Die.getSize()) + " " +
+ dwarf::TagString(Die.getTag()));
+ EmitULEB128(Die.getAbbrevNumber());
+
+ // Emit the DIE attribute values.
+ for (const auto &V : Die.values()) {
+ dwarf::Attribute Attr = V.getAttribute();
+ assert(V.getForm() && "Too many attributes for DIE (check abbreviation)");
+
+ if (isVerbose()) {
+ OutStreamer->AddComment(dwarf::AttributeString(Attr));
+ if (Attr == dwarf::DW_AT_accessibility)
+ OutStreamer->AddComment(
+ dwarf::AccessibilityString(V.getDIEInteger().getValue()));
+ }
+
+ // Emit an attribute using the defined form.
+ V.EmitValue(this);
+ }
+
+ // Emit the DIE children if any.
+ if (Die.hasChildren()) {
+ for (auto &Child : Die.children())
+ emitDwarfDIE(Child);
+
+ OutStreamer->AddComment("End Of Children Mark");
+ EmitInt8(0);
+ }
+}
+
+void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const {
+ // Emit the abbreviations code (base 1 index.)
+ EmitULEB128(Abbrev.getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev.Emit(this);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
new file mode 100644
index 000000000000..638226e90a7a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -0,0 +1,74 @@
+//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a generic interface for AsmPrinter handlers,
+// like debug and EH info emitters.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MCSymbol;
+
+typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm);
+
+/// \brief Collects and handles AsmPrinter objects required to build debug
+/// or EH information.
+class AsmPrinterHandler {
+public:
+ virtual ~AsmPrinterHandler();
+
+ /// \brief For symbols that have a size designated (e.g. common symbols),
+ /// this tracks that size.
+ virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0;
+
+ /// \brief Emit all sections that should come after the content.
+ virtual void endModule() = 0;
+
+ /// \brief Gather pre-function debug information.
+ /// Every beginFunction(MF) call should be followed by an endFunction(MF)
+ /// call.
+ virtual void beginFunction(const MachineFunction *MF) = 0;
+
+ // \brief Emit any of function marker (like .cfi_endproc). This is called
+ // before endFunction and cannot switch sections.
+ virtual void markFunctionEnd();
+
+ /// \brief Gather post-function debug information.
+ /// Please note that some AsmPrinter implementations may not call
+ /// beginFunction at all.
+ virtual void endFunction(const MachineFunction *MF) = 0;
+
+ virtual void beginFragment(const MachineBasicBlock *MBB,
+ ExceptionSymbolProvider ESP) {}
+ virtual void endFragment() {}
+
+ /// \brief Emit target-specific EH funclet machinery.
+ virtual void beginFunclet(const MachineBasicBlock &MBB,
+ MCSymbol *Sym = nullptr) {}
+ virtual void endFunclet() {}
+
+ /// \brief Process beginning of an instruction.
+ virtual void beginInstruction(const MachineInstr *MI) = 0;
+
+ /// \brief Process end of an instruction.
+ virtual void endInstruction() = 0;
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
new file mode 100644
index 000000000000..2ce6c182235f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -0,0 +1,578 @@
+//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the inline assembler pieces of the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace {
+ struct SrcMgrDiagInfo {
+ const MDNode *LocInfo;
+ LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+ void *DiagContext;
+ };
+}
+
+/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+ SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+ assert(DiagInfo && "Diagnostic context not passed down?");
+
+ // If the inline asm had metadata associated with it, pull out a location
+ // cookie corresponding to which line the error occurred on.
+ unsigned LocCookie = 0;
+ if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ unsigned ErrorLine = Diag.getLineNo()-1;
+ if (ErrorLine >= LocInfo->getNumOperands())
+ ErrorLine = 0;
+
+ if (LocInfo->getNumOperands() != 0)
+ if (const ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ LocCookie = CI->getZExtValue();
+ }
+
+ DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
+/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
+ const MCTargetOptions &MCOptions,
+ const MDNode *LocMDNode,
+ InlineAsm::AsmDialect Dialect) const {
+ assert(!Str.empty() && "Can't emit empty inline asm block");
+
+ // Remember if the buffer is nul terminated or not so we can avoid a copy.
+ bool isNullTerminated = Str.back() == 0;
+ if (isNullTerminated)
+ Str = Str.substr(0, Str.size()-1);
+
+ // If the output streamer does not have mature MC support or the integrated
+ // assembler has been disabled, just emit the blob textually.
+ // Otherwise parse the asm and emit it via MC support.
+ // This is useful in case the asm parser doesn't handle something but the
+ // system assembler does.
+ const MCAsmInfo *MCAI = TM.getMCAsmInfo();
+ assert(MCAI && "No MCAsmInfo");
+ if (!MCAI->useIntegratedAssembler() &&
+ !OutStreamer->isIntegratedAssemblerRequired()) {
+ emitInlineAsmStart();
+ OutStreamer->EmitRawText(Str);
+ emitInlineAsmEnd(STI, nullptr);
+ return;
+ }
+
+ SourceMgr SrcMgr;
+ SrcMgrDiagInfo DiagInfo;
+
+ // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
+ LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+ bool HasDiagHandler = false;
+ if (LLVMCtx.getInlineAsmDiagnosticHandler() != nullptr) {
+ // If the source manager has an issue, we arrange for srcMgrDiagHandler
+ // to be invoked, getting DiagInfo passed into it.
+ DiagInfo.LocInfo = LocMDNode;
+ DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo);
+ HasDiagHandler = true;
+ }
+
+ std::unique_ptr<MemoryBuffer> Buffer;
+ if (isNullTerminated)
+ Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
+ else
+ Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+
+ // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+ SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+
+ std::unique_ptr<MCAsmParser> Parser(
+ createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI));
+
+ // We create a new MCInstrInfo here since we might be at the module level
+ // and not have a MachineFunction to initialize the TargetInstrInfo from and
+ // we only need MCInstrInfo for asm parsing. We create one unconditionally
+ // because it's not subtarget dependent.
+ std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
+ std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
+ STI, *Parser, *MII, MCOptions));
+ if (!TAP)
+ report_fatal_error("Inline asm not supported by this streamer because"
+ " we don't have an asm parser for this target\n");
+ Parser->setAssemblerDialect(Dialect);
+ Parser->setTargetParser(*TAP.get());
+ if (MF) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
+ }
+
+ emitInlineAsmStart();
+ // Don't implicitly switch to the text section before the asm.
+ int Res = Parser->Run(/*NoInitialTextSection*/ true,
+ /*NoFinalize*/ true);
+ emitInlineAsmEnd(STI, &TAP->getSTI());
+ if (Res && !HasDiagHandler)
+ report_fatal_error("Error parsing inline asm\n");
+}
+
+static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, int InlineAsmVariant,
+ AsmPrinter *AP, unsigned LocCookie,
+ raw_ostream &OS) {
+ // Switch to the inline assembly variant.
+ OS << "\t.intel_syntax\n\t";
+
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+ unsigned NumOperands = MI->getNumOperands();
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+
+ OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$':
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ }
+ if (Done) break;
+
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
+
+ if (Val >= NumOperands-1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() ||
+ MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
+ /*Modifier*/ nullptr, OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+ /*Modifier*/ nullptr, OS);
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ break;
+ }
+ }
+ }
+ OS << "\n\t.att_syntax\n" << (char)0; // null terminate string.
+}
+
+static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, int InlineAsmVariant,
+ int AsmPrinterVariant, AsmPrinter *AP,
+ unsigned LocCookie, raw_ostream &OS) {
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ const char *LastEmitted = AsmStr; // One past the last character emitted.
+ unsigned NumOperands = MI->getNumOperands();
+
+ OS << '\t';
+
+ while (*LastEmitted) {
+ switch (*LastEmitted) {
+ default: {
+ // Not a special case, emit the string section literally.
+ const char *LiteralEnd = LastEmitted+1;
+ while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+ *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+ ++LiteralEnd;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ LastEmitted = LiteralEnd;
+ break;
+ }
+ case '\n':
+ ++LastEmitted; // Consume newline character.
+ OS << '\n'; // Indent code with newline.
+ break;
+ case '$': {
+ ++LastEmitted; // Consume '$' character.
+ bool Done = true;
+
+ // Handle escapes.
+ switch (*LastEmitted) {
+ default: Done = false; break;
+ case '$': // $$ -> $
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS << '$';
+ ++LastEmitted; // Consume second '$' character.
+ break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1)
+ report_fatal_error("Nested variants found in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // consume '|' character.
+ if (CurVariant == -1)
+ OS << '|'; // this is gcc's behavior for | outside a variant
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // consume ')' character.
+ if (CurVariant == -1)
+ OS << '}'; // this is gcc's behavior for } outside a variant
+ else
+ CurVariant = -1;
+ break;
+ }
+ if (Done) break;
+
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
+ // If we have ${:foo}, then this is not a real operand reference, it is a
+ // "magic" string reference, just like in .td files. Arrange to call
+ // PrintSpecial.
+ if (HasCurlyBraces && *LastEmitted == ':') {
+ ++LastEmitted;
+ const char *StrStart = LastEmitted;
+ const char *StrEnd = strchr(StrStart, '}');
+ if (!StrEnd)
+ report_fatal_error("Unterminated ${:foo} operand in inline asm"
+ " string: '" + Twine(AsmStr) + "'");
+
+ std::string Val(StrStart, StrEnd);
+ AP->PrintSpecial(MI, OS, Val.c_str());
+ LastEmitted = StrEnd+1;
+ break;
+ }
+
+ const char *IDStart = LastEmitted;
+ const char *IDEnd = IDStart;
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+ unsigned Val;
+ if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+ report_fatal_error("Bad $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ LastEmitted = IDEnd;
+
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0)
+ report_fatal_error("Bad ${:} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}')
+ report_fatal_error("Bad ${} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ ++LastEmitted; // Consume '}' character.
+ }
+
+ if (Val >= NumOperands-1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ // Okay, we finally have a value number. Ask the target to print this
+ // operand!
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+ bool Error = false;
+
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands()) break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() ||
+ MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
+ } else {
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ if (Modifier[0] == 'l') { // Labels are target independent.
+ // FIXME: What if the operand isn't an MBB, report error?
+ const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
+ Sym->print(OS, AP->MAI);
+ } else {
+ if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
+ Modifier[0] ? Modifier : nullptr,
+ OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+ Modifier[0] ? Modifier : nullptr, OS);
+ }
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+ }
+ }
+ break;
+ }
+ }
+ }
+ OS << '\n' << (char)0; // null terminate string.
+}
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+ assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+ // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+ // If this asmstr is empty, just print the #APP/#NOAPP markers.
+ // These are useful to see where empty asm's wound up.
+ if (AsmStr[0] == 0) {
+ OutStreamer->emitRawComment(MAI->getInlineAsmStart());
+ OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
+ return;
+ }
+
+ // Emit the #APP start marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use emitRawComment.
+ OutStreamer->emitRawComment(MAI->getInlineAsmStart());
+
+ // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+ // it.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = nullptr;
+ for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+ if (MI->getOperand(i-1).isMetadata() &&
+ (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ // Emit the inline asm to a temporary string so we can emit it through
+ // EmitInlineAsm.
+ SmallString<256> StringData;
+ raw_svector_ostream OS(StringData);
+
+ // The variant of the current asmprinter.
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+ InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect();
+ AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+ if (InlineAsmVariant == InlineAsm::AD_ATT)
+ EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant,
+ AP, LocCookie, OS);
+ else
+ EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
+
+ // Reset SanitizeAddress based on the function's attribute.
+ MCTargetOptions MCOptions = TM.Options.MCOptions;
+ MCOptions.SanitizeAddress =
+ MF->getFunction()->hasFnAttribute(Attribute::SanitizeAddress);
+
+ EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
+ MI->getInlineAsmDialect());
+
+ // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
+ // enabled, so we use emitRawComment.
+ OutStreamer->emitRawComment(MAI->getInlineAsmEnd());
+}
+
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself. This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings. The
+/// syntax used is ${:comment}. Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+ const char *Code) const {
+ if (!strcmp(Code, "private")) {
+ const DataLayout &DL = MF->getDataLayout();
+ OS << DL.getPrivateGlobalPrefix();
+ } else if (!strcmp(Code, "comment")) {
+ OS << MAI->getCommentString();
+ } else if (!strcmp(Code, "uid")) {
+ // Comparing the address of MI isn't sufficient, because machineinstrs may
+ // be allocated to the same address across functions.
+
+ // If this is a new LastFn instruction, bump the counter.
+ if (LastMI != MI || LastFn != getFunctionNumber()) {
+ ++Counter;
+ LastMI = MI;
+ LastFn = getFunctionNumber();
+ }
+ OS << Counter;
+ } else {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Unknown special formatter '" << Code
+ << "' for machine instr: " << *MI;
+ report_fatal_error(Msg.str());
+ }
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant. Targets should
+/// override this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (ExtraCode[0]) {
+ default:
+ return true; // Unknown modifier.
+ case 'c': // Substitute immediate value without immediate syntax
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << MO.getImm();
+ return false;
+ case 'n': // Negate the immediate constant.
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << -MO.getImm();
+ return false;
+ case 's': // The GCC deprecated s modifier
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << ((32 - MO.getImm()) & 31);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Target doesn't support this yet!
+ return true;
+}
+
+void AsmPrinter::emitInlineAsmStart() const {}
+
+void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
+ const MCSubtargetInfo *EndInfo) const {}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
new file mode 100644
index 000000000000..aaf6180c9404
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -0,0 +1,110 @@
+//===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a class that can take bytes that would normally be
+// streamed via the AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H
+
+#include "DIEHash.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/LEB128.h"
+#include <string>
+
+namespace llvm {
+class ByteStreamer {
+ protected:
+ ~ByteStreamer() = default;
+ ByteStreamer(const ByteStreamer&) = default;
+ ByteStreamer() = default;
+
+ public:
+ // For now we're just handling the calls we need for dwarf emission/hashing.
+ virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
+ virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
+ virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0;
+};
+
+class APByteStreamer final : public ByteStreamer {
+private:
+ AsmPrinter &AP;
+
+public:
+ APByteStreamer(AsmPrinter &Asm) : AP(Asm) {}
+ void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ AP.OutStreamer->AddComment(Comment);
+ AP.EmitInt8(Byte);
+ }
+ void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ AP.OutStreamer->AddComment(Comment);
+ AP.EmitSLEB128(DWord);
+ }
+ void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ AP.OutStreamer->AddComment(Comment);
+ AP.EmitULEB128(DWord);
+ }
+};
+
+class HashingByteStreamer final : public ByteStreamer {
+ private:
+ DIEHash &Hash;
+ public:
+ HashingByteStreamer(DIEHash &H) : Hash(H) {}
+ void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ Hash.update(Byte);
+ }
+ void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ Hash.addSLEB128(DWord);
+ }
+ void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ Hash.addULEB128(DWord);
+ }
+};
+
+class BufferByteStreamer final : public ByteStreamer {
+private:
+ SmallVectorImpl<char> &Buffer;
+ SmallVectorImpl<std::string> &Comments;
+
+ /// \brief Only verbose textual output needs comments. This will be set to
+ /// true for that case, and false otherwise. If false, comments passed in to
+ /// the emit methods will be ignored.
+ bool GenerateComments;
+
+public:
+ BufferByteStreamer(SmallVectorImpl<char> &Buffer,
+ SmallVectorImpl<std::string> &Comments,
+ bool GenerateComments)
+ : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {}
+ void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ Buffer.push_back(Byte);
+ if (GenerateComments)
+ Comments.push_back(Comment.str());
+ }
+ void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ raw_svector_ostream OSE(Buffer);
+ encodeSLEB128(DWord, OSE);
+ if (GenerateComments)
+ Comments.push_back(Comment.str());
+ }
+ void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ raw_svector_ostream OSE(Buffer);
+ encodeULEB128(DWord, OSE);
+ if (GenerateComments)
+ Comments.push_back(Comment.str());
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
new file mode 100644
index 000000000000..b0ba57122206
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -0,0 +1,2076 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Microsoft CodeView debug info.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeViewDebug.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/DebugInfo/CodeView/ByteStream.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/Line.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeDumper.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
+ : DebugHandlerBase(AP), OS(*Asm->OutStreamer), CurFn(nullptr) {
+ // If module doesn't have named metadata anchors or COFF debug section
+ // is not available, skip any debug info related stuff.
+ if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
+ !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) {
+ Asm = nullptr;
+ return;
+ }
+
+ // Tell MMI that we have debug info.
+ MMI->setDebugInfoAvailability(true);
+}
+
+StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
+ std::string &Filepath = FileToFilepathMap[File];
+ if (!Filepath.empty())
+ return Filepath;
+
+ StringRef Dir = File->getDirectory(), Filename = File->getFilename();
+
+ // Clang emits directory and relative filename info into the IR, but CodeView
+ // operates on full paths. We could change Clang to emit full paths too, but
+ // that would increase the IR size and probably not needed for other users.
+ // For now, just concatenate and canonicalize the path here.
+ if (Filename.find(':') == 1)
+ Filepath = Filename;
+ else
+ Filepath = (Dir + "\\" + Filename).str();
+
+ // Canonicalize the path. We have to do it textually because we may no longer
+ // have access the file in the filesystem.
+ // First, replace all slashes with backslashes.
+ std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
+
+ // Remove all "\.\" with "\".
+ size_t Cursor = 0;
+ while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
+ Filepath.erase(Cursor, 2);
+
+ // Replace all "\XXX\..\" with "\". Don't try too hard though as the original
+ // path should be well-formatted, e.g. start with a drive letter, etc.
+ Cursor = 0;
+ while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
+ // Something's wrong if the path starts with "\..\", abort.
+ if (Cursor == 0)
+ break;
+
+ size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
+ if (PrevSlash == std::string::npos)
+ // Something's wrong, abort.
+ break;
+
+ Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
+ // The next ".." might be following the one we've just erased.
+ Cursor = PrevSlash;
+ }
+
+ // Remove all duplicate backslashes.
+ Cursor = 0;
+ while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
+ Filepath.erase(Cursor, 1);
+
+ return Filepath;
+}
+
+unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
+ unsigned NextId = FileIdMap.size() + 1;
+ auto Insertion = FileIdMap.insert(std::make_pair(F, NextId));
+ if (Insertion.second) {
+ // We have to compute the full filepath and emit a .cv_file directive.
+ StringRef FullPath = getFullFilepath(F);
+ NextId = OS.EmitCVFileDirective(NextId, FullPath);
+ assert(NextId == FileIdMap.size() && ".cv_file directive failed");
+ }
+ return Insertion.first->second;
+}
+
+CodeViewDebug::InlineSite &
+CodeViewDebug::getInlineSite(const DILocation *InlinedAt,
+ const DISubprogram *Inlinee) {
+ auto SiteInsertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()});
+ InlineSite *Site = &SiteInsertion.first->second;
+ if (SiteInsertion.second) {
+ Site->SiteFuncId = NextFuncId++;
+ Site->Inlinee = Inlinee;
+ InlinedSubprograms.insert(Inlinee);
+ getFuncIdForSubprogram(Inlinee);
+ }
+ return *Site;
+}
+
+static StringRef getPrettyScopeName(const DIScope *Scope) {
+ StringRef ScopeName = Scope->getName();
+ if (!ScopeName.empty())
+ return ScopeName;
+
+ switch (Scope->getTag()) {
+ case dwarf::DW_TAG_enumeration_type:
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ return "<unnamed-tag>";
+ case dwarf::DW_TAG_namespace:
+ return "`anonymous namespace'";
+ }
+
+ return StringRef();
+}
+
+static const DISubprogram *getQualifiedNameComponents(
+ const DIScope *Scope, SmallVectorImpl<StringRef> &QualifiedNameComponents) {
+ const DISubprogram *ClosestSubprogram = nullptr;
+ while (Scope != nullptr) {
+ if (ClosestSubprogram == nullptr)
+ ClosestSubprogram = dyn_cast<DISubprogram>(Scope);
+ StringRef ScopeName = getPrettyScopeName(Scope);
+ if (!ScopeName.empty())
+ QualifiedNameComponents.push_back(ScopeName);
+ Scope = Scope->getScope().resolve();
+ }
+ return ClosestSubprogram;
+}
+
+static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents,
+ StringRef TypeName) {
+ std::string FullyQualifiedName;
+ for (StringRef QualifiedNameComponent : reverse(QualifiedNameComponents)) {
+ FullyQualifiedName.append(QualifiedNameComponent);
+ FullyQualifiedName.append("::");
+ }
+ FullyQualifiedName.append(TypeName);
+ return FullyQualifiedName;
+}
+
+static std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name) {
+ SmallVector<StringRef, 5> QualifiedNameComponents;
+ getQualifiedNameComponents(Scope, QualifiedNameComponents);
+ return getQualifiedName(QualifiedNameComponents, Name);
+}
+
+struct CodeViewDebug::TypeLoweringScope {
+ TypeLoweringScope(CodeViewDebug &CVD) : CVD(CVD) { ++CVD.TypeEmissionLevel; }
+ ~TypeLoweringScope() {
+ // Don't decrement TypeEmissionLevel until after emitting deferred types, so
+ // inner TypeLoweringScopes don't attempt to emit deferred types.
+ if (CVD.TypeEmissionLevel == 1)
+ CVD.emitDeferredCompleteTypes();
+ --CVD.TypeEmissionLevel;
+ }
+ CodeViewDebug &CVD;
+};
+
+static std::string getFullyQualifiedName(const DIScope *Ty) {
+ const DIScope *Scope = Ty->getScope().resolve();
+ return getFullyQualifiedName(Scope, getPrettyScopeName(Ty));
+}
+
+TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
+ // No scope means global scope and that uses the zero index.
+ if (!Scope || isa<DIFile>(Scope))
+ return TypeIndex();
+
+ assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type");
+
+ // Check if we've already translated this scope.
+ auto I = TypeIndices.find({Scope, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // Build the fully qualified name of the scope.
+ std::string ScopeName = getFullyQualifiedName(Scope);
+ TypeIndex TI =
+ TypeTable.writeStringId(StringIdRecord(TypeIndex(), ScopeName));
+ return recordTypeIndexForDINode(Scope, TI);
+}
+
+TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
+ // It's possible to ask for the FuncId of a function which doesn't have a
+ // subprogram: inlining a function with debug info into a function with none.
+ if (!SP)
+ return TypeIndex::None();
+
+ // Check if we've already translated this subprogram.
+ auto I = TypeIndices.find({SP, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // The display name includes function template arguments. Drop them to match
+ // MSVC.
+ StringRef DisplayName = SP->getDisplayName().split('<').first;
+
+ const DIScope *Scope = SP->getScope().resolve();
+ TypeIndex TI;
+ if (const auto *Class = dyn_cast_or_null<DICompositeType>(Scope)) {
+ // If the scope is a DICompositeType, then this must be a method. Member
+ // function types take some special handling, and require access to the
+ // subprogram.
+ TypeIndex ClassType = getTypeIndex(Class);
+ MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class),
+ DisplayName);
+ TI = TypeTable.writeMemberFuncId(MFuncId);
+ } else {
+ // Otherwise, this must be a free function.
+ TypeIndex ParentScope = getScopeIndex(Scope);
+ FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName);
+ TI = TypeTable.writeFuncId(FuncId);
+ }
+
+ return recordTypeIndexForDINode(SP, TI);
+}
+
+TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP,
+ const DICompositeType *Class) {
+ // Always use the method declaration as the key for the function type. The
+ // method declaration contains the this adjustment.
+ if (SP->getDeclaration())
+ SP = SP->getDeclaration();
+ assert(!SP->getDeclaration() && "should use declaration as key");
+
+ // Key the MemberFunctionRecord into the map as {SP, Class}. It won't collide
+ // with the MemberFuncIdRecord, which is keyed in as {SP, nullptr}.
+ auto I = TypeIndices.find({SP, Class});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // Make sure complete type info for the class is emitted *after* the member
+ // function type, as the complete class type is likely to reference this
+ // member function type.
+ TypeLoweringScope S(*this);
+ TypeIndex TI =
+ lowerTypeMemberFunction(SP->getType(), Class, SP->getThisAdjustment());
+ return recordTypeIndexForDINode(SP, TI, Class);
+}
+
+TypeIndex CodeViewDebug::recordTypeIndexForDINode(const DINode *Node,
+ TypeIndex TI,
+ const DIType *ClassTy) {
+ auto InsertResult = TypeIndices.insert({{Node, ClassTy}, TI});
+ (void)InsertResult;
+ assert(InsertResult.second && "DINode was already assigned a type index");
+ return TI;
+}
+
+unsigned CodeViewDebug::getPointerSizeInBytes() {
+ return MMI->getModule()->getDataLayout().getPointerSizeInBits() / 8;
+}
+
+void CodeViewDebug::recordLocalVariable(LocalVariable &&Var,
+ const DILocation *InlinedAt) {
+ if (InlinedAt) {
+ // This variable was inlined. Associate it with the InlineSite.
+ const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram();
+ InlineSite &Site = getInlineSite(InlinedAt, Inlinee);
+ Site.InlinedLocals.emplace_back(Var);
+ } else {
+ // This variable goes in the main ProcSym.
+ CurFn->Locals.emplace_back(Var);
+ }
+}
+
+static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs,
+ const DILocation *Loc) {
+ auto B = Locs.begin(), E = Locs.end();
+ if (std::find(B, E, Loc) == E)
+ Locs.push_back(Loc);
+}
+
+void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
+ const MachineFunction *MF) {
+ // Skip this instruction if it has the same location as the previous one.
+ if (DL == CurFn->LastLoc)
+ return;
+
+ const DIScope *Scope = DL.get()->getScope();
+ if (!Scope)
+ return;
+
+ // Skip this line if it is longer than the maximum we can record.
+ LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true);
+ if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() ||
+ LI.isNeverStepInto())
+ return;
+
+ ColumnInfo CI(DL.getCol(), /*EndColumn=*/0);
+ if (CI.getStartColumn() != DL.getCol())
+ return;
+
+ if (!CurFn->HaveLineInfo)
+ CurFn->HaveLineInfo = true;
+ unsigned FileId = 0;
+ if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile())
+ FileId = CurFn->LastFileId;
+ else
+ FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile());
+ CurFn->LastLoc = DL;
+
+ unsigned FuncId = CurFn->FuncId;
+ if (const DILocation *SiteLoc = DL->getInlinedAt()) {
+ const DILocation *Loc = DL.get();
+
+ // If this location was actually inlined from somewhere else, give it the ID
+ // of the inline call site.
+ FuncId =
+ getInlineSite(SiteLoc, Loc->getScope()->getSubprogram()).SiteFuncId;
+
+ // Ensure we have links in the tree of inline call sites.
+ bool FirstLoc = true;
+ while ((SiteLoc = Loc->getInlinedAt())) {
+ InlineSite &Site =
+ getInlineSite(SiteLoc, Loc->getScope()->getSubprogram());
+ if (!FirstLoc)
+ addLocIfNotPresent(Site.ChildSites, Loc);
+ FirstLoc = false;
+ Loc = SiteLoc;
+ }
+ addLocIfNotPresent(CurFn->ChildSites, Loc);
+ }
+
+ OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(),
+ /*PrologueEnd=*/false,
+ /*IsStmt=*/false, DL->getFilename());
+}
+
+void CodeViewDebug::emitCodeViewMagicVersion() {
+ OS.EmitValueToAlignment(4);
+ OS.AddComment("Debug section magic");
+ OS.EmitIntValue(COFF::DEBUG_SECTION_MAGIC, 4);
+}
+
+void CodeViewDebug::endModule() {
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
+
+ assert(Asm != nullptr);
+
+ // The COFF .debug$S section consists of several subsections, each starting
+ // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
+ // of the payload followed by the payload itself. The subsections are 4-byte
+ // aligned.
+
+ // Use the generic .debug$S section, and make a subsection for all the inlined
+ // subprograms.
+ switchToDebugSectionForSymbol(nullptr);
+ emitInlineeLinesSubsection();
+
+ // Emit per-function debug information.
+ for (auto &P : FnDebugInfo)
+ if (!P.first->isDeclarationForLinker())
+ emitDebugInfoForFunction(P.first, P.second);
+
+ // Emit global variable debug information.
+ setCurrentSubprogram(nullptr);
+ emitDebugInfoForGlobals();
+
+ // Emit retained types.
+ emitDebugInfoForRetainedTypes();
+
+ // Switch back to the generic .debug$S section after potentially processing
+ // comdat symbol sections.
+ switchToDebugSectionForSymbol(nullptr);
+
+ // Emit UDT records for any types used by global variables.
+ if (!GlobalUDTs.empty()) {
+ MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ emitDebugInfoForUDTs(GlobalUDTs);
+ endCVSubsection(SymbolsEnd);
+ }
+
+ // This subsection holds a file index to offset in string table table.
+ OS.AddComment("File index to string table offset subsection");
+ OS.EmitCVFileChecksumsDirective();
+
+ // This subsection holds the string table.
+ OS.AddComment("String table");
+ OS.EmitCVStringTableDirective();
+
+ // Emit type information last, so that any types we translate while emitting
+ // function info are included.
+ emitTypeInformation();
+
+ clear();
+}
+
+static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) {
+ // Microsoft's linker seems to have trouble with symbol names longer than
+ // 0xffd8 bytes.
+ S = S.substr(0, 0xffd8);
+ SmallString<32> NullTerminatedString(S);
+ NullTerminatedString.push_back('\0');
+ OS.EmitBytes(NullTerminatedString);
+}
+
+void CodeViewDebug::emitTypeInformation() {
+ // Do nothing if we have no debug info or if no non-trivial types were emitted
+ // to TypeTable during codegen.
+ NamedMDNode *CU_Nodes = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes)
+ return;
+ if (TypeTable.empty())
+ return;
+
+ // Start the .debug$T section with 0x4.
+ OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
+ emitCodeViewMagicVersion();
+
+ SmallString<8> CommentPrefix;
+ if (OS.isVerboseAsm()) {
+ CommentPrefix += '\t';
+ CommentPrefix += Asm->MAI->getCommentString();
+ CommentPrefix += ' ';
+ }
+
+ CVTypeDumper CVTD(nullptr, /*PrintRecordBytes=*/false);
+ TypeTable.ForEachRecord(
+ [&](TypeIndex Index, StringRef Record) {
+ if (OS.isVerboseAsm()) {
+ // Emit a block comment describing the type record for readability.
+ SmallString<512> CommentBlock;
+ raw_svector_ostream CommentOS(CommentBlock);
+ ScopedPrinter SP(CommentOS);
+ SP.setPrefix(CommentPrefix);
+ CVTD.setPrinter(&SP);
+ Error E = CVTD.dump({Record.bytes_begin(), Record.bytes_end()});
+ if (E) {
+ logAllUnhandledErrors(std::move(E), errs(), "error: ");
+ llvm_unreachable("produced malformed type record");
+ }
+ // emitRawComment will insert its own tab and comment string before
+ // the first line, so strip off our first one. It also prints its own
+ // newline.
+ OS.emitRawComment(
+ CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
+ } else {
+#ifndef NDEBUG
+ // Assert that the type data is valid even if we aren't dumping
+ // comments. The MSVC linker doesn't do much type record validation,
+ // so the first link of an invalid type record can succeed while
+ // subsequent links will fail with LNK1285.
+ ByteStream<> Stream({Record.bytes_begin(), Record.bytes_end()});
+ CVTypeArray Types;
+ StreamReader Reader(Stream);
+ Error E = Reader.readArray(Types, Reader.getLength());
+ if (!E) {
+ TypeVisitorCallbacks C;
+ E = CVTypeVisitor(C).visitTypeStream(Types);
+ }
+ if (E) {
+ logAllUnhandledErrors(std::move(E), errs(), "error: ");
+ llvm_unreachable("produced malformed type record");
+ }
+#endif
+ }
+ OS.EmitBinaryData(Record);
+ });
+}
+
+void CodeViewDebug::emitInlineeLinesSubsection() {
+ if (InlinedSubprograms.empty())
+ return;
+
+ OS.AddComment("Inlinee lines subsection");
+ MCSymbol *InlineEnd = beginCVSubsection(ModuleSubstreamKind::InlineeLines);
+
+ // We don't provide any extra file info.
+ // FIXME: Find out if debuggers use this info.
+ OS.AddComment("Inlinee lines signature");
+ OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4);
+
+ for (const DISubprogram *SP : InlinedSubprograms) {
+ assert(TypeIndices.count({SP, nullptr}));
+ TypeIndex InlineeIdx = TypeIndices[{SP, nullptr}];
+
+ OS.AddBlankLine();
+ unsigned FileId = maybeRecordFile(SP->getFile());
+ OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " +
+ SP->getFilename() + Twine(':') + Twine(SP->getLine()));
+ OS.AddBlankLine();
+ // The filechecksum table uses 8 byte entries for now, and file ids start at
+ // 1.
+ unsigned FileOffset = (FileId - 1) * 8;
+ OS.AddComment("Type index of inlined function");
+ OS.EmitIntValue(InlineeIdx.getIndex(), 4);
+ OS.AddComment("Offset into filechecksum table");
+ OS.EmitIntValue(FileOffset, 4);
+ OS.AddComment("Starting line number");
+ OS.EmitIntValue(SP->getLine(), 4);
+ }
+
+ endCVSubsection(InlineEnd);
+}
+
+void CodeViewDebug::collectInlineSiteChildren(
+ SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI,
+ const InlineSite &Site) {
+ for (const DILocation *ChildSiteLoc : Site.ChildSites) {
+ auto I = FI.InlineSites.find(ChildSiteLoc);
+ const InlineSite &ChildSite = I->second;
+ Children.push_back(ChildSite.SiteFuncId);
+ collectInlineSiteChildren(Children, FI, ChildSite);
+ }
+}
+
+void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
+ const DILocation *InlinedAt,
+ const InlineSite &Site) {
+ MCSymbol *InlineBegin = MMI->getContext().createTempSymbol(),
+ *InlineEnd = MMI->getContext().createTempSymbol();
+
+ assert(TypeIndices.count({Site.Inlinee, nullptr}));
+ TypeIndex InlineeIdx = TypeIndices[{Site.Inlinee, nullptr}];
+
+ // SymbolRecord
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(InlineEnd, InlineBegin, 2); // RecordLength
+ OS.EmitLabel(InlineBegin);
+ OS.AddComment("Record kind: S_INLINESITE");
+ OS.EmitIntValue(SymbolKind::S_INLINESITE, 2); // RecordKind
+
+ OS.AddComment("PtrParent");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("PtrEnd");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Inlinee type index");
+ OS.EmitIntValue(InlineeIdx.getIndex(), 4);
+
+ unsigned FileId = maybeRecordFile(Site.Inlinee->getFile());
+ unsigned StartLineNum = Site.Inlinee->getLine();
+ SmallVector<unsigned, 3> SecondaryFuncIds;
+ collectInlineSiteChildren(SecondaryFuncIds, FI, Site);
+
+ OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
+ FI.Begin, FI.End, SecondaryFuncIds);
+
+ OS.EmitLabel(InlineEnd);
+
+ emitLocalVariableList(Site.InlinedLocals);
+
+ // Recurse on child inlined call sites before closing the scope.
+ for (const DILocation *ChildSite : Site.ChildSites) {
+ auto I = FI.InlineSites.find(ChildSite);
+ assert(I != FI.InlineSites.end() &&
+ "child site not in function inline site map");
+ emitInlinedCallSite(FI, ChildSite, I->second);
+ }
+
+ // Close the scope.
+ OS.AddComment("Record length");
+ OS.EmitIntValue(2, 2); // RecordLength
+ OS.AddComment("Record kind: S_INLINESITE_END");
+ OS.EmitIntValue(SymbolKind::S_INLINESITE_END, 2); // RecordKind
+}
+
+void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
+ // If we have a symbol, it may be in a section that is COMDAT. If so, find the
+ // comdat key. A section may be comdat because of -ffunction-sections or
+ // because it is comdat in the IR.
+ MCSectionCOFF *GVSec =
+ GVSym ? dyn_cast<MCSectionCOFF>(&GVSym->getSection()) : nullptr;
+ const MCSymbol *KeySym = GVSec ? GVSec->getCOMDATSymbol() : nullptr;
+
+ MCSectionCOFF *DebugSec = cast<MCSectionCOFF>(
+ Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
+ DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym);
+
+ OS.SwitchSection(DebugSec);
+
+ // Emit the magic version number if this is the first time we've switched to
+ // this section.
+ if (ComdatDebugSections.insert(DebugSec).second)
+ emitCodeViewMagicVersion();
+}
+
+void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
+ FunctionInfo &FI) {
+ // For each function there is a separate subsection
+ // which holds the PC to file:line table.
+ const MCSymbol *Fn = Asm->getSymbol(GV);
+ assert(Fn);
+
+ // Switch to the to a comdat section, if appropriate.
+ switchToDebugSectionForSymbol(Fn);
+
+ std::string FuncName;
+ auto *SP = GV->getSubprogram();
+ setCurrentSubprogram(SP);
+
+ // If we have a display name, build the fully qualified name by walking the
+ // chain of scopes.
+ if (SP != nullptr && !SP->getDisplayName().empty())
+ FuncName =
+ getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName());
+
+ // If our DISubprogram name is empty, use the mangled name.
+ if (FuncName.empty())
+ FuncName = GlobalValue::getRealLinkageName(GV->getName());
+
+ // Emit a symbol subsection, required by VS2012+ to find function boundaries.
+ OS.AddComment("Symbol subsection for " + Twine(FuncName));
+ MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ {
+ MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(),
+ *ProcRecordEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2);
+ OS.EmitLabel(ProcRecordBegin);
+
+ if (GV->hasLocalLinkage()) {
+ OS.AddComment("Record kind: S_LPROC32_ID");
+ OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
+ } else {
+ OS.AddComment("Record kind: S_GPROC32_ID");
+ OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
+ }
+
+ // These fields are filled in by tools like CVPACK which run after the fact.
+ OS.AddComment("PtrParent");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("PtrEnd");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("PtrNext");
+ OS.EmitIntValue(0, 4);
+ // This is the important bit that tells the debugger where the function
+ // code is located and what's its size:
+ OS.AddComment("Code size");
+ OS.emitAbsoluteSymbolDiff(FI.End, Fn, 4);
+ OS.AddComment("Offset after prologue");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Offset before epilogue");
+ OS.EmitIntValue(0, 4);
+ OS.AddComment("Function type index");
+ OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4);
+ OS.AddComment("Function section relative address");
+ OS.EmitCOFFSecRel32(Fn);
+ OS.AddComment("Function section index");
+ OS.EmitCOFFSectionIndex(Fn);
+ OS.AddComment("Flags");
+ OS.EmitIntValue(0, 1);
+ // Emit the function display name as a null-terminated string.
+ OS.AddComment("Function name");
+ // Truncate the name so we won't overflow the record length field.
+ emitNullTerminatedSymbolName(OS, FuncName);
+ OS.EmitLabel(ProcRecordEnd);
+
+ emitLocalVariableList(FI.Locals);
+
+ // Emit inlined call site information. Only emit functions inlined directly
+ // into the parent function. We'll emit the other sites recursively as part
+ // of their parent inline site.
+ for (const DILocation *InlinedAt : FI.ChildSites) {
+ auto I = FI.InlineSites.find(InlinedAt);
+ assert(I != FI.InlineSites.end() &&
+ "child site not in function inline site map");
+ emitInlinedCallSite(FI, InlinedAt, I->second);
+ }
+
+ if (SP != nullptr)
+ emitDebugInfoForUDTs(LocalUDTs);
+
+ // We're done with this function.
+ OS.AddComment("Record length");
+ OS.EmitIntValue(0x0002, 2);
+ OS.AddComment("Record kind: S_PROC_ID_END");
+ OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2);
+ }
+ endCVSubsection(SymbolsEnd);
+
+ // We have an assembler directive that takes care of the whole line table.
+ OS.EmitCVLinetableDirective(FI.FuncId, Fn, FI.End);
+}
+
+CodeViewDebug::LocalVarDefRange
+CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
+ LocalVarDefRange DR;
+ DR.InMemory = -1;
+ DR.DataOffset = Offset;
+ assert(DR.DataOffset == Offset && "truncation");
+ DR.StructOffset = 0;
+ DR.CVRegister = CVRegister;
+ return DR;
+}
+
+CodeViewDebug::LocalVarDefRange
+CodeViewDebug::createDefRangeReg(uint16_t CVRegister) {
+ LocalVarDefRange DR;
+ DR.InMemory = 0;
+ DR.DataOffset = 0;
+ DR.StructOffset = 0;
+ DR.CVRegister = CVRegister;
+ return DR;
+}
+
+void CodeViewDebug::collectVariableInfoFromMMITable(
+ DenseSet<InlinedVariable> &Processed) {
+ const TargetSubtargetInfo &TSI = Asm->MF->getSubtarget();
+ const TargetFrameLowering *TFI = TSI.getFrameLowering();
+ const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
+
+ for (const MachineModuleInfo::VariableDbgInfo &VI :
+ MMI->getVariableDbgInfo()) {
+ if (!VI.Var)
+ continue;
+ assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
+ "Expected inlined-at fields to agree");
+
+ Processed.insert(InlinedVariable(VI.Var, VI.Loc->getInlinedAt()));
+ LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
+
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ // Get the frame register used and the offset.
+ unsigned FrameReg = 0;
+ int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
+ uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
+
+ // Calculate the label ranges.
+ LocalVarDefRange DefRange = createDefRangeMem(CVReg, FrameOffset);
+ for (const InsnRange &Range : Scope->getRanges()) {
+ const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
+ const MCSymbol *End = getLabelAfterInsn(Range.second);
+ End = End ? End : Asm->getFunctionEnd();
+ DefRange.Ranges.emplace_back(Begin, End);
+ }
+
+ LocalVariable Var;
+ Var.DIVar = VI.Var;
+ Var.DefRanges.emplace_back(std::move(DefRange));
+ recordLocalVariable(std::move(Var), VI.Loc->getInlinedAt());
+ }
+}
+
+void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
+ DenseSet<InlinedVariable> Processed;
+ // Grab the variable info that was squirreled away in the MMI side-table.
+ collectVariableInfoFromMMITable(Processed);
+
+ const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
+
+ for (const auto &I : DbgValues) {
+ InlinedVariable IV = I.first;
+ if (Processed.count(IV))
+ continue;
+ const DILocalVariable *DIVar = IV.first;
+ const DILocation *InlinedAt = IV.second;
+
+ // Instruction ranges, specifying where IV is accessible.
+ const auto &Ranges = I.second;
+
+ LexicalScope *Scope = nullptr;
+ if (InlinedAt)
+ Scope = LScopes.findInlinedScope(DIVar->getScope(), InlinedAt);
+ else
+ Scope = LScopes.findLexicalScope(DIVar->getScope());
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ LocalVariable Var;
+ Var.DIVar = DIVar;
+
+ // Calculate the definition ranges.
+ for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
+ const InsnRange &Range = *I;
+ const MachineInstr *DVInst = Range.first;
+ assert(DVInst->isDebugValue() && "Invalid History entry");
+ const DIExpression *DIExpr = DVInst->getDebugExpression();
+
+ // Bail if there is a complex DWARF expression for now.
+ if (DIExpr && DIExpr->getNumElements() > 0)
+ continue;
+
+ // Bail if operand 0 is not a valid register. This means the variable is a
+ // simple constant, or is described by a complex expression.
+ // FIXME: Find a way to represent constant variables, since they are
+ // relatively common.
+ unsigned Reg =
+ DVInst->getOperand(0).isReg() ? DVInst->getOperand(0).getReg() : 0;
+ if (Reg == 0)
+ continue;
+
+ // Handle the two cases we can handle: indirect in memory and in register.
+ bool IsIndirect = DVInst->getOperand(1).isImm();
+ unsigned CVReg = TRI->getCodeViewRegNum(DVInst->getOperand(0).getReg());
+ {
+ LocalVarDefRange DefRange;
+ if (IsIndirect) {
+ int64_t Offset = DVInst->getOperand(1).getImm();
+ DefRange = createDefRangeMem(CVReg, Offset);
+ } else {
+ DefRange = createDefRangeReg(CVReg);
+ }
+ if (Var.DefRanges.empty() ||
+ Var.DefRanges.back().isDifferentLocation(DefRange)) {
+ Var.DefRanges.emplace_back(std::move(DefRange));
+ }
+ }
+
+ // Compute the label range.
+ const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
+ const MCSymbol *End = getLabelAfterInsn(Range.second);
+ if (!End) {
+ if (std::next(I) != E)
+ End = getLabelBeforeInsn(std::next(I)->first);
+ else
+ End = Asm->getFunctionEnd();
+ }
+
+ // If the last range end is our begin, just extend the last range.
+ // Otherwise make a new range.
+ SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &Ranges =
+ Var.DefRanges.back().Ranges;
+ if (!Ranges.empty() && Ranges.back().second == Begin)
+ Ranges.back().second = End;
+ else
+ Ranges.emplace_back(Begin, End);
+
+ // FIXME: Do more range combining.
+ }
+
+ recordLocalVariable(std::move(Var), InlinedAt);
+ }
+}
+
+void CodeViewDebug::beginFunction(const MachineFunction *MF) {
+ assert(!CurFn && "Can't process two functions at once!");
+
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
+
+ DebugHandlerBase::beginFunction(MF);
+
+ const Function *GV = MF->getFunction();
+ assert(FnDebugInfo.count(GV) == false);
+ CurFn = &FnDebugInfo[GV];
+ CurFn->FuncId = NextFuncId++;
+ CurFn->Begin = Asm->getFunctionBegin();
+
+ // Find the end of the function prolog. First known non-DBG_VALUE and
+ // non-frame setup location marks the beginning of the function body.
+ // FIXME: is there a simpler a way to do this? Can we just search
+ // for the first instruction of the function, not the last of the prolog?
+ DebugLoc PrologEndLoc;
+ bool EmptyPrologue = true;
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
+ MI.getDebugLoc()) {
+ PrologEndLoc = MI.getDebugLoc();
+ break;
+ } else if (!MI.isDebugValue()) {
+ EmptyPrologue = false;
+ }
+ }
+ }
+
+ // Record beginning of function if we have a non-empty prologue.
+ if (PrologEndLoc && !EmptyPrologue) {
+ DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
+ maybeRecordLocation(FnStartDL, MF);
+ }
+}
+
+void CodeViewDebug::addToUDTs(const DIType *Ty, TypeIndex TI) {
+ // Don't record empty UDTs.
+ if (Ty->getName().empty())
+ return;
+
+ SmallVector<StringRef, 5> QualifiedNameComponents;
+ const DISubprogram *ClosestSubprogram = getQualifiedNameComponents(
+ Ty->getScope().resolve(), QualifiedNameComponents);
+
+ std::string FullyQualifiedName =
+ getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty));
+
+ if (ClosestSubprogram == nullptr)
+ GlobalUDTs.emplace_back(std::move(FullyQualifiedName), TI);
+ else if (ClosestSubprogram == CurrentSubprogram)
+ LocalUDTs.emplace_back(std::move(FullyQualifiedName), TI);
+
+ // TODO: What if the ClosestSubprogram is neither null or the current
+ // subprogram? Currently, the UDT just gets dropped on the floor.
+ //
+ // The current behavior is not desirable. To get maximal fidelity, we would
+ // need to perform all type translation before beginning emission of .debug$S
+ // and then make LocalUDTs a member of FunctionInfo
+}
+
+TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
+ // Generic dispatch for lowering an unknown type.
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_array_type:
+ return lowerTypeArray(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_typedef:
+ return lowerTypeAlias(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_base_type:
+ return lowerTypeBasic(cast<DIBasicType>(Ty));
+ case dwarf::DW_TAG_pointer_type:
+ case dwarf::DW_TAG_reference_type:
+ case dwarf::DW_TAG_rvalue_reference_type:
+ return lowerTypePointer(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_ptr_to_member_type:
+ return lowerTypeMemberPointer(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_const_type:
+ case dwarf::DW_TAG_volatile_type:
+ return lowerTypeModifier(cast<DIDerivedType>(Ty));
+ case dwarf::DW_TAG_subroutine_type:
+ if (ClassTy) {
+ // The member function type of a member function pointer has no
+ // ThisAdjustment.
+ return lowerTypeMemberFunction(cast<DISubroutineType>(Ty), ClassTy,
+ /*ThisAdjustment=*/0);
+ }
+ return lowerTypeFunction(cast<DISubroutineType>(Ty));
+ case dwarf::DW_TAG_enumeration_type:
+ return lowerTypeEnum(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ return lowerTypeClass(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_union_type:
+ return lowerTypeUnion(cast<DICompositeType>(Ty));
+ default:
+ // Use the null type index.
+ return TypeIndex();
+ }
+}
+
+TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
+ DITypeRef UnderlyingTypeRef = Ty->getBaseType();
+ TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef);
+ StringRef TypeName = Ty->getName();
+
+ addToUDTs(Ty, UnderlyingTypeIndex);
+
+ if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::Int32Long) &&
+ TypeName == "HRESULT")
+ return TypeIndex(SimpleTypeKind::HResult);
+ if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::UInt16Short) &&
+ TypeName == "wchar_t")
+ return TypeIndex(SimpleTypeKind::WideCharacter);
+
+ return UnderlyingTypeIndex;
+}
+
+TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
+ DITypeRef ElementTypeRef = Ty->getBaseType();
+ TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);
+ // IndexType is size_t, which depends on the bitness of the target.
+ TypeIndex IndexType = Asm->MAI->getPointerSize() == 8
+ ? TypeIndex(SimpleTypeKind::UInt64Quad)
+ : TypeIndex(SimpleTypeKind::UInt32Long);
+
+ uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
+
+ bool UndefinedSubrange = false;
+
+ // FIXME:
+ // There is a bug in the front-end where an array of a structure, which was
+ // declared as incomplete structure first, ends up not getting a size assigned
+ // to it. (PR28303)
+ // Example:
+ // struct A(*p)[3];
+ // struct A { int f; } a[3];
+ //
+ // This needs to be fixed in the front-end, but in the meantime we don't want
+ // to trigger an assertion because of this.
+ if (Ty->getSizeInBits() == 0) {
+ UndefinedSubrange = true;
+ }
+
+ // Add subranges to array type.
+ DINodeArray Elements = Ty->getElements();
+ for (int i = Elements.size() - 1; i >= 0; --i) {
+ const DINode *Element = Elements[i];
+ assert(Element->getTag() == dwarf::DW_TAG_subrange_type);
+
+ const DISubrange *Subrange = cast<DISubrange>(Element);
+ assert(Subrange->getLowerBound() == 0 &&
+ "codeview doesn't support subranges with lower bounds");
+ int64_t Count = Subrange->getCount();
+
+ // Variable Length Array (VLA) has Count equal to '-1'.
+ // Replace with Count '1', assume it is the minimum VLA length.
+ // FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
+ if (Count == -1) {
+ Count = 1;
+ UndefinedSubrange = true;
+ }
+
+ StringRef Name = (i == 0) ? Ty->getName() : "";
+ // Update the element size and element type index for subsequent subranges.
+ ElementSize *= Count;
+ ElementTypeIndex = TypeTable.writeArray(
+ ArrayRecord(ElementTypeIndex, IndexType, ElementSize, Name));
+ }
+
+ (void)UndefinedSubrange;
+ assert(UndefinedSubrange || ElementSize == (Ty->getSizeInBits() / 8));
+
+ return ElementTypeIndex;
+}
+
+TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
+ TypeIndex Index;
+ dwarf::TypeKind Kind;
+ uint32_t ByteSize;
+
+ Kind = static_cast<dwarf::TypeKind>(Ty->getEncoding());
+ ByteSize = Ty->getSizeInBits() / 8;
+
+ SimpleTypeKind STK = SimpleTypeKind::None;
+ switch (Kind) {
+ case dwarf::DW_ATE_address:
+ // FIXME: Translate
+ break;
+ case dwarf::DW_ATE_boolean:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::Boolean8; break;
+ case 2: STK = SimpleTypeKind::Boolean16; break;
+ case 4: STK = SimpleTypeKind::Boolean32; break;
+ case 8: STK = SimpleTypeKind::Boolean64; break;
+ case 16: STK = SimpleTypeKind::Boolean128; break;
+ }
+ break;
+ case dwarf::DW_ATE_complex_float:
+ switch (ByteSize) {
+ case 2: STK = SimpleTypeKind::Complex16; break;
+ case 4: STK = SimpleTypeKind::Complex32; break;
+ case 8: STK = SimpleTypeKind::Complex64; break;
+ case 10: STK = SimpleTypeKind::Complex80; break;
+ case 16: STK = SimpleTypeKind::Complex128; break;
+ }
+ break;
+ case dwarf::DW_ATE_float:
+ switch (ByteSize) {
+ case 2: STK = SimpleTypeKind::Float16; break;
+ case 4: STK = SimpleTypeKind::Float32; break;
+ case 6: STK = SimpleTypeKind::Float48; break;
+ case 8: STK = SimpleTypeKind::Float64; break;
+ case 10: STK = SimpleTypeKind::Float80; break;
+ case 16: STK = SimpleTypeKind::Float128; break;
+ }
+ break;
+ case dwarf::DW_ATE_signed:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::SByte; break;
+ case 2: STK = SimpleTypeKind::Int16Short; break;
+ case 4: STK = SimpleTypeKind::Int32; break;
+ case 8: STK = SimpleTypeKind::Int64Quad; break;
+ case 16: STK = SimpleTypeKind::Int128Oct; break;
+ }
+ break;
+ case dwarf::DW_ATE_unsigned:
+ switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::Byte; break;
+ case 2: STK = SimpleTypeKind::UInt16Short; break;
+ case 4: STK = SimpleTypeKind::UInt32; break;
+ case 8: STK = SimpleTypeKind::UInt64Quad; break;
+ case 16: STK = SimpleTypeKind::UInt128Oct; break;
+ }
+ break;
+ case dwarf::DW_ATE_UTF:
+ switch (ByteSize) {
+ case 2: STK = SimpleTypeKind::Character16; break;
+ case 4: STK = SimpleTypeKind::Character32; break;
+ }
+ break;
+ case dwarf::DW_ATE_signed_char:
+ if (ByteSize == 1)
+ STK = SimpleTypeKind::SignedCharacter;
+ break;
+ case dwarf::DW_ATE_unsigned_char:
+ if (ByteSize == 1)
+ STK = SimpleTypeKind::UnsignedCharacter;
+ break;
+ default:
+ break;
+ }
+
+ // Apply some fixups based on the source-level type name.
+ if (STK == SimpleTypeKind::Int32 && Ty->getName() == "long int")
+ STK = SimpleTypeKind::Int32Long;
+ if (STK == SimpleTypeKind::UInt32 && Ty->getName() == "long unsigned int")
+ STK = SimpleTypeKind::UInt32Long;
+ if (STK == SimpleTypeKind::UInt16Short &&
+ (Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t"))
+ STK = SimpleTypeKind::WideCharacter;
+ if ((STK == SimpleTypeKind::SignedCharacter ||
+ STK == SimpleTypeKind::UnsignedCharacter) &&
+ Ty->getName() == "char")
+ STK = SimpleTypeKind::NarrowCharacter;
+
+ return TypeIndex(STK);
+}
+
+TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
+ TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType());
+
+ // While processing the type being pointed to it is possible we already
+ // created this pointer type. If so, we check here and return the existing
+ // pointer type.
+ auto I = TypeIndices.find({Ty, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ // Pointers to simple types can use SimpleTypeMode, rather than having a
+ // dedicated pointer type record.
+ if (PointeeTI.isSimple() &&
+ PointeeTI.getSimpleMode() == SimpleTypeMode::Direct &&
+ Ty->getTag() == dwarf::DW_TAG_pointer_type) {
+ SimpleTypeMode Mode = Ty->getSizeInBits() == 64
+ ? SimpleTypeMode::NearPointer64
+ : SimpleTypeMode::NearPointer32;
+ return TypeIndex(PointeeTI.getSimpleKind(), Mode);
+ }
+
+ PointerKind PK =
+ Ty->getSizeInBits() == 64 ? PointerKind::Near64 : PointerKind::Near32;
+ PointerMode PM = PointerMode::Pointer;
+ switch (Ty->getTag()) {
+ default: llvm_unreachable("not a pointer tag type");
+ case dwarf::DW_TAG_pointer_type:
+ PM = PointerMode::Pointer;
+ break;
+ case dwarf::DW_TAG_reference_type:
+ PM = PointerMode::LValueReference;
+ break;
+ case dwarf::DW_TAG_rvalue_reference_type:
+ PM = PointerMode::RValueReference;
+ break;
+ }
+ // FIXME: MSVC folds qualifiers into PointerOptions in the context of a method
+ // 'this' pointer, but not normal contexts. Figure out what we're supposed to
+ // do.
+ PointerOptions PO = PointerOptions::None;
+ PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
+ return TypeTable.writePointer(PR);
+}
+
+static PointerToMemberRepresentation
+translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) {
+ // SizeInBytes being zero generally implies that the member pointer type was
+ // incomplete, which can happen if it is part of a function prototype. In this
+ // case, use the unknown model instead of the general model.
+ if (IsPMF) {
+ switch (Flags & DINode::FlagPtrToMemberRep) {
+ case 0:
+ return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown
+ : PointerToMemberRepresentation::GeneralFunction;
+ case DINode::FlagSingleInheritance:
+ return PointerToMemberRepresentation::SingleInheritanceFunction;
+ case DINode::FlagMultipleInheritance:
+ return PointerToMemberRepresentation::MultipleInheritanceFunction;
+ case DINode::FlagVirtualInheritance:
+ return PointerToMemberRepresentation::VirtualInheritanceFunction;
+ }
+ } else {
+ switch (Flags & DINode::FlagPtrToMemberRep) {
+ case 0:
+ return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown
+ : PointerToMemberRepresentation::GeneralData;
+ case DINode::FlagSingleInheritance:
+ return PointerToMemberRepresentation::SingleInheritanceData;
+ case DINode::FlagMultipleInheritance:
+ return PointerToMemberRepresentation::MultipleInheritanceData;
+ case DINode::FlagVirtualInheritance:
+ return PointerToMemberRepresentation::VirtualInheritanceData;
+ }
+ }
+ llvm_unreachable("invalid ptr to member representation");
+}
+
+TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
+ assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type);
+ TypeIndex ClassTI = getTypeIndex(Ty->getClassType());
+ TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType());
+ PointerKind PK = Asm->MAI->getPointerSize() == 8 ? PointerKind::Near64
+ : PointerKind::Near32;
+ bool IsPMF = isa<DISubroutineType>(Ty->getBaseType());
+ PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction
+ : PointerMode::PointerToDataMember;
+ PointerOptions PO = PointerOptions::None; // FIXME
+ assert(Ty->getSizeInBits() / 8 <= 0xff && "pointer size too big");
+ uint8_t SizeInBytes = Ty->getSizeInBits() / 8;
+ MemberPointerInfo MPI(
+ ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags()));
+ PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI);
+ return TypeTable.writePointer(PR);
+}
+
+/// Given a DWARF calling convention, get the CodeView equivalent. If we don't
+/// have a translation, use the NearC convention.
+static CallingConvention dwarfCCToCodeView(unsigned DwarfCC) {
+ switch (DwarfCC) {
+ case dwarf::DW_CC_normal: return CallingConvention::NearC;
+ case dwarf::DW_CC_BORLAND_msfastcall: return CallingConvention::NearFast;
+ case dwarf::DW_CC_BORLAND_thiscall: return CallingConvention::ThisCall;
+ case dwarf::DW_CC_BORLAND_stdcall: return CallingConvention::NearStdCall;
+ case dwarf::DW_CC_BORLAND_pascal: return CallingConvention::NearPascal;
+ case dwarf::DW_CC_LLVM_vectorcall: return CallingConvention::NearVector;
+ }
+ return CallingConvention::NearC;
+}
+
+TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
+ ModifierOptions Mods = ModifierOptions::None;
+ bool IsModifier = true;
+ const DIType *BaseTy = Ty;
+ while (IsModifier && BaseTy) {
+ // FIXME: Need to add DWARF tag for __unaligned.
+ switch (BaseTy->getTag()) {
+ case dwarf::DW_TAG_const_type:
+ Mods |= ModifierOptions::Const;
+ break;
+ case dwarf::DW_TAG_volatile_type:
+ Mods |= ModifierOptions::Volatile;
+ break;
+ default:
+ IsModifier = false;
+ break;
+ }
+ if (IsModifier)
+ BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();
+ }
+ TypeIndex ModifiedTI = getTypeIndex(BaseTy);
+
+ // While processing the type being pointed to, it is possible we already
+ // created this modifier type. If so, we check here and return the existing
+ // modifier type.
+ auto I = TypeIndices.find({Ty, nullptr});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ ModifierRecord MR(ModifiedTI, Mods);
+ return TypeTable.writeModifier(MR);
+}
+
+TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
+ SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
+ for (DITypeRef ArgTypeRef : Ty->getTypeArray())
+ ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+
+ TypeIndex ReturnTypeIndex = TypeIndex::Void();
+ ArrayRef<TypeIndex> ArgTypeIndices = None;
+ if (!ReturnAndArgTypeIndices.empty()) {
+ auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices);
+ ReturnTypeIndex = ReturnAndArgTypesRef.front();
+ ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
+ }
+
+ ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
+ TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+
+ CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
+
+ ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
+ ArgTypeIndices.size(), ArgListIndex);
+ return TypeTable.writeProcedure(Procedure);
+}
+
+TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
+ const DIType *ClassTy,
+ int ThisAdjustment) {
+ // Lower the containing class type.
+ TypeIndex ClassType = getTypeIndex(ClassTy);
+
+ SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
+ for (DITypeRef ArgTypeRef : Ty->getTypeArray())
+ ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+
+ TypeIndex ReturnTypeIndex = TypeIndex::Void();
+ ArrayRef<TypeIndex> ArgTypeIndices = None;
+ if (!ReturnAndArgTypeIndices.empty()) {
+ auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices);
+ ReturnTypeIndex = ReturnAndArgTypesRef.front();
+ ArgTypeIndices = ReturnAndArgTypesRef.drop_front();
+ }
+ TypeIndex ThisTypeIndex = TypeIndex::Void();
+ if (!ArgTypeIndices.empty()) {
+ ThisTypeIndex = ArgTypeIndices.front();
+ ArgTypeIndices = ArgTypeIndices.drop_front();
+ }
+
+ ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
+ TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+
+ CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
+
+ // TODO: Need to use the correct values for:
+ // FunctionOptions
+ // ThisPointerAdjustment.
+ TypeIndex TI = TypeTable.writeMemberFunction(MemberFunctionRecord(
+ ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FunctionOptions::None,
+ ArgTypeIndices.size(), ArgListIndex, ThisAdjustment));
+
+ return TI;
+}
+
+static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) {
+ switch (Flags & DINode::FlagAccessibility) {
+ case DINode::FlagPrivate: return MemberAccess::Private;
+ case DINode::FlagPublic: return MemberAccess::Public;
+ case DINode::FlagProtected: return MemberAccess::Protected;
+ case 0:
+ // If there was no explicit access control, provide the default for the tag.
+ return RecordTag == dwarf::DW_TAG_class_type ? MemberAccess::Private
+ : MemberAccess::Public;
+ }
+ llvm_unreachable("access flags are exclusive");
+}
+
+static MethodOptions translateMethodOptionFlags(const DISubprogram *SP) {
+ if (SP->isArtificial())
+ return MethodOptions::CompilerGenerated;
+
+ // FIXME: Handle other MethodOptions.
+
+ return MethodOptions::None;
+}
+
+static MethodKind translateMethodKindFlags(const DISubprogram *SP,
+ bool Introduced) {
+ switch (SP->getVirtuality()) {
+ case dwarf::DW_VIRTUALITY_none:
+ break;
+ case dwarf::DW_VIRTUALITY_virtual:
+ return Introduced ? MethodKind::IntroducingVirtual : MethodKind::Virtual;
+ case dwarf::DW_VIRTUALITY_pure_virtual:
+ return Introduced ? MethodKind::PureIntroducingVirtual
+ : MethodKind::PureVirtual;
+ default:
+ llvm_unreachable("unhandled virtuality case");
+ }
+
+ // FIXME: Get Clang to mark DISubprogram as static and do something with it.
+
+ return MethodKind::Vanilla;
+}
+
+static TypeRecordKind getRecordKind(const DICompositeType *Ty) {
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_class_type: return TypeRecordKind::Class;
+ case dwarf::DW_TAG_structure_type: return TypeRecordKind::Struct;
+ }
+ llvm_unreachable("unexpected tag");
+}
+
+/// Return ClassOptions that should be present on both the forward declaration
+/// and the defintion of a tag type.
+static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
+ ClassOptions CO = ClassOptions::None;
+
+ // MSVC always sets this flag, even for local types. Clang doesn't always
+ // appear to give every type a linkage name, which may be problematic for us.
+ // FIXME: Investigate the consequences of not following them here.
+ if (!Ty->getIdentifier().empty())
+ CO |= ClassOptions::HasUniqueName;
+
+ // Put the Nested flag on a type if it appears immediately inside a tag type.
+ // Do not walk the scope chain. Do not attempt to compute ContainsNestedClass
+ // here. That flag is only set on definitions, and not forward declarations.
+ const DIScope *ImmediateScope = Ty->getScope().resolve();
+ if (ImmediateScope && isa<DICompositeType>(ImmediateScope))
+ CO |= ClassOptions::Nested;
+
+ // Put the Scoped flag on function-local types.
+ for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
+ Scope = Scope->getScope().resolve()) {
+ if (isa<DISubprogram>(Scope)) {
+ CO |= ClassOptions::Scoped;
+ break;
+ }
+ }
+
+ return CO;
+}
+
+TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
+ ClassOptions CO = getCommonClassOptions(Ty);
+ TypeIndex FTI;
+ unsigned EnumeratorCount = 0;
+
+ if (Ty->isForwardDecl()) {
+ CO |= ClassOptions::ForwardReference;
+ } else {
+ FieldListRecordBuilder Fields;
+ for (const DINode *Element : Ty->getElements()) {
+ // We assume that the frontend provides all members in source declaration
+ // order, which is what MSVC does.
+ if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) {
+ Fields.writeEnumerator(EnumeratorRecord(
+ MemberAccess::Public, APSInt::getUnsigned(Enumerator->getValue()),
+ Enumerator->getName()));
+ EnumeratorCount++;
+ }
+ }
+ FTI = TypeTable.writeFieldList(Fields);
+ }
+
+ std::string FullName = getFullyQualifiedName(Ty);
+
+ return TypeTable.writeEnum(EnumRecord(EnumeratorCount, CO, FTI, FullName,
+ Ty->getIdentifier(),
+ getTypeIndex(Ty->getBaseType())));
+}
+
+//===----------------------------------------------------------------------===//
+// ClassInfo
+//===----------------------------------------------------------------------===//
+
+struct llvm::ClassInfo {
+ struct MemberInfo {
+ const DIDerivedType *MemberTypeNode;
+ uint64_t BaseOffset;
+ };
+ // [MemberInfo]
+ typedef std::vector<MemberInfo> MemberList;
+
+ typedef TinyPtrVector<const DISubprogram *> MethodsList;
+ // MethodName -> MethodsList
+ typedef MapVector<MDString *, MethodsList> MethodsMap;
+
+ /// Base classes.
+ std::vector<const DIDerivedType *> Inheritance;
+
+ /// Direct members.
+ MemberList Members;
+ // Direct overloaded methods gathered by name.
+ MethodsMap Methods;
+
+ std::vector<const DICompositeType *> NestedClasses;
+};
+
+void CodeViewDebug::clear() {
+ assert(CurFn == nullptr);
+ FileIdMap.clear();
+ FnDebugInfo.clear();
+ FileToFilepathMap.clear();
+ LocalUDTs.clear();
+ GlobalUDTs.clear();
+ TypeIndices.clear();
+ CompleteTypeIndices.clear();
+}
+
+void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
+ const DIDerivedType *DDTy) {
+ if (!DDTy->getName().empty()) {
+ Info.Members.push_back({DDTy, 0});
+ return;
+ }
+ // An unnamed member must represent a nested struct or union. Add all the
+ // indirect fields to the current record.
+ assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!");
+ uint64_t Offset = DDTy->getOffsetInBits();
+ const DIType *Ty = DDTy->getBaseType().resolve();
+ const DICompositeType *DCTy = cast<DICompositeType>(Ty);
+ ClassInfo NestedInfo = collectClassInfo(DCTy);
+ for (const ClassInfo::MemberInfo &IndirectField : NestedInfo.Members)
+ Info.Members.push_back(
+ {IndirectField.MemberTypeNode, IndirectField.BaseOffset + Offset});
+}
+
+ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {
+ ClassInfo Info;
+ // Add elements to structure type.
+ DINodeArray Elements = Ty->getElements();
+ for (auto *Element : Elements) {
+ // We assume that the frontend provides all members in source declaration
+ // order, which is what MSVC does.
+ if (!Element)
+ continue;
+ if (auto *SP = dyn_cast<DISubprogram>(Element)) {
+ Info.Methods[SP->getRawName()].push_back(SP);
+ } else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
+ if (DDTy->getTag() == dwarf::DW_TAG_member) {
+ collectMemberInfo(Info, DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_inheritance) {
+ Info.Inheritance.push_back(DDTy);
+ } else if (DDTy->getTag() == dwarf::DW_TAG_friend) {
+ // Ignore friend members. It appears that MSVC emitted info about
+ // friends in the past, but modern versions do not.
+ }
+ // FIXME: Get Clang to emit function virtual table here and handle it.
+ } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
+ Info.NestedClasses.push_back(Composite);
+ }
+ // Skip other unrecognized kinds of elements.
+ }
+ return Info;
+}
+
+TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) {
+ // First, construct the forward decl. Don't look into Ty to compute the
+ // forward decl options, since it might not be available in all TUs.
+ TypeRecordKind Kind = getRecordKind(Ty);
+ ClassOptions CO =
+ ClassOptions::ForwardReference | getCommonClassOptions(Ty);
+ std::string FullName = getFullyQualifiedName(Ty);
+ TypeIndex FwdDeclTI = TypeTable.writeClass(ClassRecord(
+ Kind, 0, CO, HfaKind::None, WindowsRTClassKind::None, TypeIndex(),
+ TypeIndex(), TypeIndex(), 0, FullName, Ty->getIdentifier()));
+ if (!Ty->isForwardDecl())
+ DeferredCompleteTypes.push_back(Ty);
+ return FwdDeclTI;
+}
+
+TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
+ // Construct the field list and complete type record.
+ TypeRecordKind Kind = getRecordKind(Ty);
+ ClassOptions CO = getCommonClassOptions(Ty);
+ TypeIndex FieldTI;
+ TypeIndex VShapeTI;
+ unsigned FieldCount;
+ bool ContainsNestedClass;
+ std::tie(FieldTI, VShapeTI, FieldCount, ContainsNestedClass) =
+ lowerRecordFieldList(Ty);
+
+ if (ContainsNestedClass)
+ CO |= ClassOptions::ContainsNestedClass;
+
+ std::string FullName = getFullyQualifiedName(Ty);
+
+ uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
+
+ TypeIndex ClassTI = TypeTable.writeClass(ClassRecord(
+ Kind, FieldCount, CO, HfaKind::None, WindowsRTClassKind::None, FieldTI,
+ TypeIndex(), VShapeTI, SizeInBytes, FullName, Ty->getIdentifier()));
+
+ TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
+ ClassTI, TypeTable.writeStringId(StringIdRecord(
+ TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
+ Ty->getLine()));
+
+ addToUDTs(Ty, ClassTI);
+
+ return ClassTI;
+}
+
+TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) {
+ ClassOptions CO =
+ ClassOptions::ForwardReference | getCommonClassOptions(Ty);
+ std::string FullName = getFullyQualifiedName(Ty);
+ TypeIndex FwdDeclTI =
+ TypeTable.writeUnion(UnionRecord(0, CO, HfaKind::None, TypeIndex(), 0,
+ FullName, Ty->getIdentifier()));
+ if (!Ty->isForwardDecl())
+ DeferredCompleteTypes.push_back(Ty);
+ return FwdDeclTI;
+}
+
+TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {
+ ClassOptions CO = ClassOptions::Sealed | getCommonClassOptions(Ty);
+ TypeIndex FieldTI;
+ unsigned FieldCount;
+ bool ContainsNestedClass;
+ std::tie(FieldTI, std::ignore, FieldCount, ContainsNestedClass) =
+ lowerRecordFieldList(Ty);
+
+ if (ContainsNestedClass)
+ CO |= ClassOptions::ContainsNestedClass;
+
+ uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
+ std::string FullName = getFullyQualifiedName(Ty);
+
+ TypeIndex UnionTI = TypeTable.writeUnion(
+ UnionRecord(FieldCount, CO, HfaKind::None, FieldTI, SizeInBytes, FullName,
+ Ty->getIdentifier()));
+
+ TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
+ UnionTI, TypeTable.writeStringId(StringIdRecord(
+ TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
+ Ty->getLine()));
+
+ addToUDTs(Ty, UnionTI);
+
+ return UnionTI;
+}
+
+std::tuple<TypeIndex, TypeIndex, unsigned, bool>
+CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
+ // Manually count members. MSVC appears to count everything that generates a
+ // field list record. Each individual overload in a method overload group
+ // contributes to this count, even though the overload group is a single field
+ // list record.
+ unsigned MemberCount = 0;
+ ClassInfo Info = collectClassInfo(Ty);
+ FieldListRecordBuilder Fields;
+
+ // Create base classes.
+ for (const DIDerivedType *I : Info.Inheritance) {
+ if (I->getFlags() & DINode::FlagVirtual) {
+ // Virtual base.
+ // FIXME: Emit VBPtrOffset when the frontend provides it.
+ unsigned VBPtrOffset = 0;
+ // FIXME: Despite the accessor name, the offset is really in bytes.
+ unsigned VBTableIndex = I->getOffsetInBits() / 4;
+ Fields.writeVirtualBaseClass(VirtualBaseClassRecord(
+ translateAccessFlags(Ty->getTag(), I->getFlags()),
+ getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset,
+ VBTableIndex));
+ } else {
+ assert(I->getOffsetInBits() % 8 == 0 &&
+ "bases must be on byte boundaries");
+ Fields.writeBaseClass(BaseClassRecord(
+ translateAccessFlags(Ty->getTag(), I->getFlags()),
+ getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8));
+ }
+ }
+
+ // Create members.
+ for (ClassInfo::MemberInfo &MemberInfo : Info.Members) {
+ const DIDerivedType *Member = MemberInfo.MemberTypeNode;
+ TypeIndex MemberBaseType = getTypeIndex(Member->getBaseType());
+ StringRef MemberName = Member->getName();
+ MemberAccess Access =
+ translateAccessFlags(Ty->getTag(), Member->getFlags());
+
+ if (Member->isStaticMember()) {
+ Fields.writeStaticDataMember(
+ StaticDataMemberRecord(Access, MemberBaseType, MemberName));
+ MemberCount++;
+ continue;
+ }
+
+ // Data member.
+ uint64_t MemberOffsetInBits =
+ Member->getOffsetInBits() + MemberInfo.BaseOffset;
+ if (Member->isBitField()) {
+ uint64_t StartBitOffset = MemberOffsetInBits;
+ if (const auto *CI =
+ dyn_cast_or_null<ConstantInt>(Member->getStorageOffsetInBits())) {
+ MemberOffsetInBits = CI->getZExtValue() + MemberInfo.BaseOffset;
+ }
+ StartBitOffset -= MemberOffsetInBits;
+ MemberBaseType = TypeTable.writeBitField(BitFieldRecord(
+ MemberBaseType, Member->getSizeInBits(), StartBitOffset));
+ }
+ uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8;
+ Fields.writeDataMember(DataMemberRecord(Access, MemberBaseType,
+ MemberOffsetInBytes, MemberName));
+ MemberCount++;
+ }
+
+ // Create methods
+ for (auto &MethodItr : Info.Methods) {
+ StringRef Name = MethodItr.first->getString();
+
+ std::vector<OneMethodRecord> Methods;
+ for (const DISubprogram *SP : MethodItr.second) {
+ TypeIndex MethodType = getMemberFunctionType(SP, Ty);
+ bool Introduced = SP->getFlags() & DINode::FlagIntroducedVirtual;
+
+ unsigned VFTableOffset = -1;
+ if (Introduced)
+ VFTableOffset = SP->getVirtualIndex() * getPointerSizeInBytes();
+
+ Methods.push_back(
+ OneMethodRecord(MethodType, translateMethodKindFlags(SP, Introduced),
+ translateMethodOptionFlags(SP),
+ translateAccessFlags(Ty->getTag(), SP->getFlags()),
+ VFTableOffset, Name));
+ MemberCount++;
+ }
+ assert(Methods.size() > 0 && "Empty methods map entry");
+ if (Methods.size() == 1)
+ Fields.writeOneMethod(Methods[0]);
+ else {
+ TypeIndex MethodList =
+ TypeTable.writeMethodOverloadList(MethodOverloadListRecord(Methods));
+ Fields.writeOverloadedMethod(
+ OverloadedMethodRecord(Methods.size(), MethodList, Name));
+ }
+ }
+
+ // Create nested classes.
+ for (const DICompositeType *Nested : Info.NestedClasses) {
+ NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
+ Fields.writeNestedType(R);
+ MemberCount++;
+ }
+
+ TypeIndex FieldTI = TypeTable.writeFieldList(Fields);
+ return std::make_tuple(FieldTI, TypeIndex(), MemberCount,
+ !Info.NestedClasses.empty());
+}
+
+TypeIndex CodeViewDebug::getVBPTypeIndex() {
+ if (!VBPType.getIndex()) {
+ // Make a 'const int *' type.
+ ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const);
+ TypeIndex ModifiedTI = TypeTable.writeModifier(MR);
+
+ PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
+ : PointerKind::Near32;
+ PointerMode PM = PointerMode::Pointer;
+ PointerOptions PO = PointerOptions::None;
+ PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes());
+
+ VBPType = TypeTable.writePointer(PR);
+ }
+
+ return VBPType;
+}
+
+TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
+ const DIType *Ty = TypeRef.resolve();
+ const DIType *ClassTy = ClassTyRef.resolve();
+
+ // The null DIType is the void type. Don't try to hash it.
+ if (!Ty)
+ return TypeIndex::Void();
+
+ // Check if we've already translated this type. Don't try to do a
+ // get-or-create style insertion that caches the hash lookup across the
+ // lowerType call. It will update the TypeIndices map.
+ auto I = TypeIndices.find({Ty, ClassTy});
+ if (I != TypeIndices.end())
+ return I->second;
+
+ TypeLoweringScope S(*this);
+ TypeIndex TI = lowerType(Ty, ClassTy);
+ return recordTypeIndexForDINode(Ty, TI, ClassTy);
+}
+
+TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
+ const DIType *Ty = TypeRef.resolve();
+
+ // The null DIType is the void type. Don't try to hash it.
+ if (!Ty)
+ return TypeIndex::Void();
+
+ // If this is a non-record type, the complete type index is the same as the
+ // normal type index. Just call getTypeIndex.
+ switch (Ty->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ break;
+ default:
+ return getTypeIndex(Ty);
+ }
+
+ // Check if we've already translated the complete record type. Lowering a
+ // complete type should never trigger lowering another complete type, so we
+ // can reuse the hash table lookup result.
+ const auto *CTy = cast<DICompositeType>(Ty);
+ auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
+ if (!InsertResult.second)
+ return InsertResult.first->second;
+
+ TypeLoweringScope S(*this);
+
+ // Make sure the forward declaration is emitted first. It's unclear if this
+ // is necessary, but MSVC does it, and we should follow suit until we can show
+ // otherwise.
+ TypeIndex FwdDeclTI = getTypeIndex(CTy);
+
+ // Just use the forward decl if we don't have complete type info. This might
+ // happen if the frontend is using modules and expects the complete definition
+ // to be emitted elsewhere.
+ if (CTy->isForwardDecl())
+ return FwdDeclTI;
+
+ TypeIndex TI;
+ switch (CTy->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ TI = lowerCompleteTypeClass(CTy);
+ break;
+ case dwarf::DW_TAG_union_type:
+ TI = lowerCompleteTypeUnion(CTy);
+ break;
+ default:
+ llvm_unreachable("not a record");
+ }
+
+ InsertResult.first->second = TI;
+ return TI;
+}
+
+/// Emit all the deferred complete record types. Try to do this in FIFO order,
+/// and do this until fixpoint, as each complete record type typically
+/// references
+/// many other record types.
+void CodeViewDebug::emitDeferredCompleteTypes() {
+ SmallVector<const DICompositeType *, 4> TypesToEmit;
+ while (!DeferredCompleteTypes.empty()) {
+ std::swap(DeferredCompleteTypes, TypesToEmit);
+ for (const DICompositeType *RecordTy : TypesToEmit)
+ getCompleteTypeIndex(RecordTy);
+ TypesToEmit.clear();
+ }
+}
+
+void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) {
+ // Get the sorted list of parameters and emit them first.
+ SmallVector<const LocalVariable *, 6> Params;
+ for (const LocalVariable &L : Locals)
+ if (L.DIVar->isParameter())
+ Params.push_back(&L);
+ std::sort(Params.begin(), Params.end(),
+ [](const LocalVariable *L, const LocalVariable *R) {
+ return L->DIVar->getArg() < R->DIVar->getArg();
+ });
+ for (const LocalVariable *L : Params)
+ emitLocalVariable(*L);
+
+ // Next emit all non-parameters in the order that we found them.
+ for (const LocalVariable &L : Locals)
+ if (!L.DIVar->isParameter())
+ emitLocalVariable(L);
+}
+
+void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
+ // LocalSym record, see SymbolRecord.h for more info.
+ MCSymbol *LocalBegin = MMI->getContext().createTempSymbol(),
+ *LocalEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(LocalEnd, LocalBegin, 2);
+ OS.EmitLabel(LocalBegin);
+
+ OS.AddComment("Record kind: S_LOCAL");
+ OS.EmitIntValue(unsigned(SymbolKind::S_LOCAL), 2);
+
+ LocalSymFlags Flags = LocalSymFlags::None;
+ if (Var.DIVar->isParameter())
+ Flags |= LocalSymFlags::IsParameter;
+ if (Var.DefRanges.empty())
+ Flags |= LocalSymFlags::IsOptimizedOut;
+
+ OS.AddComment("TypeIndex");
+ TypeIndex TI = getCompleteTypeIndex(Var.DIVar->getType());
+ OS.EmitIntValue(TI.getIndex(), 4);
+ OS.AddComment("Flags");
+ OS.EmitIntValue(static_cast<uint16_t>(Flags), 2);
+ // Truncate the name so we won't overflow the record length field.
+ emitNullTerminatedSymbolName(OS, Var.DIVar->getName());
+ OS.EmitLabel(LocalEnd);
+
+ // Calculate the on disk prefix of the appropriate def range record. The
+ // records and on disk formats are described in SymbolRecords.h. BytePrefix
+ // should be big enough to hold all forms without memory allocation.
+ SmallString<20> BytePrefix;
+ for (const LocalVarDefRange &DefRange : Var.DefRanges) {
+ BytePrefix.clear();
+ // FIXME: Handle bitpieces.
+ if (DefRange.StructOffset != 0)
+ continue;
+
+ if (DefRange.InMemory) {
+ DefRangeRegisterRelSym Sym(DefRange.CVRegister, 0, DefRange.DataOffset, 0,
+ 0, 0, ArrayRef<LocalVariableAddrGap>());
+ ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL);
+ BytePrefix +=
+ StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
+ BytePrefix +=
+ StringRef(reinterpret_cast<const char *>(&Sym.Header),
+ sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+ } else {
+ assert(DefRange.DataOffset == 0 && "unexpected offset into register");
+ // Unclear what matters here.
+ DefRangeRegisterSym Sym(DefRange.CVRegister, 0, 0, 0, 0,
+ ArrayRef<LocalVariableAddrGap>());
+ ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
+ BytePrefix +=
+ StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
+ BytePrefix +=
+ StringRef(reinterpret_cast<const char *>(&Sym.Header),
+ sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+ }
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
+ }
+}
+
+void CodeViewDebug::endFunction(const MachineFunction *MF) {
+ if (!Asm || !CurFn) // We haven't created any debug info for this function.
+ return;
+
+ const Function *GV = MF->getFunction();
+ assert(FnDebugInfo.count(GV));
+ assert(CurFn == &FnDebugInfo[GV]);
+
+ collectVariableInfo(GV->getSubprogram());
+
+ DebugHandlerBase::endFunction(MF);
+
+ // Don't emit anything if we don't have any line tables.
+ if (!CurFn->HaveLineInfo) {
+ FnDebugInfo.erase(GV);
+ CurFn = nullptr;
+ return;
+ }
+
+ CurFn->End = Asm->getFunctionEnd();
+
+ CurFn = nullptr;
+}
+
+void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
+ DebugHandlerBase::beginInstruction(MI);
+
+ // Ignore DBG_VALUE locations and function prologue.
+ if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
+ return;
+ DebugLoc DL = MI->getDebugLoc();
+ if (DL == PrevInstLoc || !DL)
+ return;
+ maybeRecordLocation(DL, Asm->MF);
+}
+
+MCSymbol *CodeViewDebug::beginCVSubsection(ModuleSubstreamKind Kind) {
+ MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(),
+ *EndLabel = MMI->getContext().createTempSymbol();
+ OS.EmitIntValue(unsigned(Kind), 4);
+ OS.AddComment("Subsection size");
+ OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4);
+ OS.EmitLabel(BeginLabel);
+ return EndLabel;
+}
+
+void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) {
+ OS.EmitLabel(EndLabel);
+ // Every subsection must be aligned to a 4-byte boundary.
+ OS.EmitValueToAlignment(4);
+}
+
+void CodeViewDebug::emitDebugInfoForUDTs(
+ ArrayRef<std::pair<std::string, TypeIndex>> UDTs) {
+ for (const std::pair<std::string, codeview::TypeIndex> &UDT : UDTs) {
+ MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(),
+ *UDTRecordEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(UDTRecordEnd, UDTRecordBegin, 2);
+ OS.EmitLabel(UDTRecordBegin);
+
+ OS.AddComment("Record kind: S_UDT");
+ OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2);
+
+ OS.AddComment("Type");
+ OS.EmitIntValue(UDT.second.getIndex(), 4);
+
+ emitNullTerminatedSymbolName(OS, UDT.first);
+ OS.EmitLabel(UDTRecordEnd);
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForGlobals() {
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ for (const MDNode *Node : CUs->operands()) {
+ const auto *CU = cast<DICompileUnit>(Node);
+
+ // First, emit all globals that are not in a comdat in a single symbol
+ // substream. MSVC doesn't like it if the substream is empty, so only open
+ // it if we have at least one global to emit.
+ switchToDebugSectionForSymbol(nullptr);
+ MCSymbol *EndLabel = nullptr;
+ for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
+ if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+ if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
+ if (!EndLabel) {
+ OS.AddComment("Symbol subsection for globals");
+ EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ }
+ emitDebugInfoForGlobal(G, Asm->getSymbol(GV));
+ }
+ }
+ }
+ if (EndLabel)
+ endCVSubsection(EndLabel);
+
+ // Second, emit each global that is in a comdat into its own .debug$S
+ // section along with its own symbol substream.
+ for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
+ if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+ if (GV->hasComdat()) {
+ MCSymbol *GVSym = Asm->getSymbol(GV);
+ OS.AddComment("Symbol subsection for " +
+ Twine(GlobalValue::getRealLinkageName(GV->getName())));
+ switchToDebugSectionForSymbol(GVSym);
+ EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
+ emitDebugInfoForGlobal(G, GVSym);
+ endCVSubsection(EndLabel);
+ }
+ }
+ }
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForRetainedTypes() {
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ for (const MDNode *Node : CUs->operands()) {
+ for (auto *Ty : cast<DICompileUnit>(Node)->getRetainedTypes()) {
+ if (DIType *RT = dyn_cast<DIType>(Ty)) {
+ getTypeIndex(RT);
+ // FIXME: Add to global/local DTU list.
+ }
+ }
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
+ MCSymbol *GVSym) {
+ // DataSym record, see SymbolRecord.h for more info.
+ // FIXME: Thread local data, etc
+ MCSymbol *DataBegin = MMI->getContext().createTempSymbol(),
+ *DataEnd = MMI->getContext().createTempSymbol();
+ OS.AddComment("Record length");
+ OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);
+ OS.EmitLabel(DataBegin);
+ const auto *GV = cast<GlobalVariable>(DIGV->getVariable());
+ if (DIGV->isLocalToUnit()) {
+ if (GV->isThreadLocal()) {
+ OS.AddComment("Record kind: S_LTHREAD32");
+ OS.EmitIntValue(unsigned(SymbolKind::S_LTHREAD32), 2);
+ } else {
+ OS.AddComment("Record kind: S_LDATA32");
+ OS.EmitIntValue(unsigned(SymbolKind::S_LDATA32), 2);
+ }
+ } else {
+ if (GV->isThreadLocal()) {
+ OS.AddComment("Record kind: S_GTHREAD32");
+ OS.EmitIntValue(unsigned(SymbolKind::S_GTHREAD32), 2);
+ } else {
+ OS.AddComment("Record kind: S_GDATA32");
+ OS.EmitIntValue(unsigned(SymbolKind::S_GDATA32), 2);
+ }
+ }
+ OS.AddComment("Type");
+ OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
+ OS.AddComment("DataOffset");
+ OS.EmitCOFFSecRel32(GVSym);
+ OS.AddComment("Segment");
+ OS.EmitCOFFSectionIndex(GVSym);
+ OS.AddComment("Name");
+ emitNullTerminatedSymbolName(OS, DIGV->getName());
+ OS.EmitLabel(DataEnd);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
new file mode 100644
index 000000000000..e4bbd61d4ce0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -0,0 +1,310 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Microsoft CodeView debug info.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
+
+#include "DebugHandlerBase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+
+class StringRef;
+class LexicalScope;
+struct ClassInfo;
+
+/// \brief Collects and handles line tables information in a CodeView format.
+class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
+ MCStreamer &OS;
+ codeview::MemoryTypeTableBuilder TypeTable;
+
+ /// Represents the most general definition range.
+ struct LocalVarDefRange {
+ /// Indicates that variable data is stored in memory relative to the
+ /// specified register.
+ int InMemory : 1;
+
+ /// Offset of variable data in memory.
+ int DataOffset : 31;
+
+ /// Offset of the data into the user level struct. If zero, no splitting
+ /// occurred.
+ uint16_t StructOffset;
+
+ /// Register containing the data or the register base of the memory
+ /// location containing the data.
+ uint16_t CVRegister;
+
+ /// Compares all location fields. This includes all fields except the label
+ /// ranges.
+ bool isDifferentLocation(LocalVarDefRange &O) {
+ return InMemory != O.InMemory || DataOffset != O.DataOffset ||
+ StructOffset != O.StructOffset || CVRegister != O.CVRegister;
+ }
+
+ SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges;
+ };
+
+ static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
+ static LocalVarDefRange createDefRangeReg(uint16_t CVRegister);
+
+ /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
+ struct LocalVariable {
+ const DILocalVariable *DIVar = nullptr;
+ SmallVector<LocalVarDefRange, 1> DefRanges;
+ };
+
+ struct InlineSite {
+ SmallVector<LocalVariable, 1> InlinedLocals;
+ SmallVector<const DILocation *, 1> ChildSites;
+ const DISubprogram *Inlinee = nullptr;
+
+ /// The ID of the inline site or function used with .cv_loc. Not a type
+ /// index.
+ unsigned SiteFuncId = 0;
+ };
+
+ // For each function, store a vector of labels to its instructions, as well as
+ // to the end of the function.
+ struct FunctionInfo {
+ /// Map from inlined call site to inlined instructions and child inlined
+ /// call sites. Listed in program order.
+ std::unordered_map<const DILocation *, InlineSite> InlineSites;
+
+ /// Ordered list of top-level inlined call sites.
+ SmallVector<const DILocation *, 1> ChildSites;
+
+ SmallVector<LocalVariable, 1> Locals;
+
+ DebugLoc LastLoc;
+ const MCSymbol *Begin = nullptr;
+ const MCSymbol *End = nullptr;
+ unsigned FuncId = 0;
+ unsigned LastFileId = 0;
+ bool HaveLineInfo = false;
+ };
+ FunctionInfo *CurFn;
+
+ /// The set of comdat .debug$S sections that we've seen so far. Each section
+ /// must start with a magic version number that must only be emitted once.
+ /// This set tracks which sections we've already opened.
+ DenseSet<MCSectionCOFF *> ComdatDebugSections;
+
+ /// Switch to the appropriate .debug$S section for GVSym. If GVSym, the symbol
+ /// of an emitted global value, is in a comdat COFF section, this will switch
+ /// to a new .debug$S section in that comdat. This method ensures that the
+ /// section starts with the magic version number on first use. If GVSym is
+ /// null, uses the main .debug$S section.
+ void switchToDebugSectionForSymbol(const MCSymbol *GVSym);
+
+ /// The next available function index for use with our .cv_* directives. Not
+ /// to be confused with type indices for LF_FUNC_ID records.
+ unsigned NextFuncId = 0;
+
+ InlineSite &getInlineSite(const DILocation *InlinedAt,
+ const DISubprogram *Inlinee);
+
+ codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP);
+
+ static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children,
+ const FunctionInfo &FI,
+ const InlineSite &Site);
+
+ /// Remember some debug info about each function. Keep it in a stable order to
+ /// emit at the end of the TU.
+ MapVector<const Function *, FunctionInfo> FnDebugInfo;
+
+ /// Map from DIFile to .cv_file id.
+ DenseMap<const DIFile *, unsigned> FileIdMap;
+
+ /// All inlined subprograms in the order they should be emitted.
+ SmallSetVector<const DISubprogram *, 4> InlinedSubprograms;
+
+ /// Map from a pair of DI metadata nodes and its DI type (or scope) that can
+ /// be nullptr, to CodeView type indices. Primarily indexed by
+ /// {DIType*, DIType*} and {DISubprogram*, DIType*}.
+ ///
+ /// The second entry in the key is needed for methods as DISubroutineType
+ /// representing static method type are shared with non-method function type.
+ DenseMap<std::pair<const DINode *, const DIType *>, codeview::TypeIndex>
+ TypeIndices;
+
+ /// Map from DICompositeType* to complete type index. Non-record types are
+ /// always looked up in the normal TypeIndices map.
+ DenseMap<const DICompositeType *, codeview::TypeIndex> CompleteTypeIndices;
+
+ /// Complete record types to emit after all active type lowerings are
+ /// finished.
+ SmallVector<const DICompositeType *, 4> DeferredCompleteTypes;
+
+ /// Number of type lowering frames active on the stack.
+ unsigned TypeEmissionLevel = 0;
+
+ codeview::TypeIndex VBPType;
+
+ const DISubprogram *CurrentSubprogram = nullptr;
+
+ // The UDTs we have seen while processing types; each entry is a pair of type
+ // index and type name.
+ std::vector<std::pair<std::string, codeview::TypeIndex>> LocalUDTs,
+ GlobalUDTs;
+
+ typedef std::map<const DIFile *, std::string> FileToFilepathMapTy;
+ FileToFilepathMapTy FileToFilepathMap;
+ StringRef getFullFilepath(const DIFile *S);
+
+ unsigned maybeRecordFile(const DIFile *F);
+
+ void maybeRecordLocation(const DebugLoc &DL, const MachineFunction *MF);
+
+ void clear();
+
+ void setCurrentSubprogram(const DISubprogram *SP) {
+ CurrentSubprogram = SP;
+ LocalUDTs.clear();
+ }
+
+ /// Emit the magic version number at the start of a CodeView type or symbol
+ /// section. Appears at the front of every .debug$S or .debug$T section.
+ void emitCodeViewMagicVersion();
+
+ void emitTypeInformation();
+
+ void emitInlineeLinesSubsection();
+
+ void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
+
+ void emitDebugInfoForGlobals();
+
+ void emitDebugInfoForRetainedTypes();
+
+ void emitDebugInfoForUDTs(
+ ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs);
+
+ void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, MCSymbol *GVSym);
+
+ /// Opens a subsection of the given kind in a .debug$S codeview section.
+ /// Returns an end label for use with endCVSubsection when the subsection is
+ /// finished.
+ MCSymbol *beginCVSubsection(codeview::ModuleSubstreamKind Kind);
+
+ void endCVSubsection(MCSymbol *EndLabel);
+
+ void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt,
+ const InlineSite &Site);
+
+ typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+
+ void collectVariableInfo(const DISubprogram *SP);
+
+ void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &Processed);
+
+ /// Records information about a local variable in the appropriate scope. In
+ /// particular, locals from inlined code live inside the inlining site.
+ void recordLocalVariable(LocalVariable &&Var, const DILocation *Loc);
+
+ /// Emits local variables in the appropriate order.
+ void emitLocalVariableList(ArrayRef<LocalVariable> Locals);
+
+ /// Emits an S_LOCAL record and its associated defined ranges.
+ void emitLocalVariable(const LocalVariable &Var);
+
+ /// Translates the DIType to codeview if necessary and returns a type index
+ /// for it.
+ codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
+ DITypeRef ClassTyRef = DITypeRef());
+
+ codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
+ const DICompositeType *Class);
+
+ codeview::TypeIndex getScopeIndex(const DIScope *Scope);
+
+ codeview::TypeIndex getVBPTypeIndex();
+
+ void addToUDTs(const DIType *Ty, codeview::TypeIndex TI);
+
+ codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);
+ codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty);
+ codeview::TypeIndex lowerTypePointer(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
+ codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
+ codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
+ const DIType *ClassTy,
+ int ThisAdjustment);
+ codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty);
+
+ /// Symbol records should point to complete types, but type records should
+ /// always point to incomplete types to avoid cycles in the type graph. Only
+ /// use this entry point when generating symbol records. The complete and
+ /// incomplete type indices only differ for record types. All other types use
+ /// the same index.
+ codeview::TypeIndex getCompleteTypeIndex(DITypeRef TypeRef);
+
+ codeview::TypeIndex lowerCompleteTypeClass(const DICompositeType *Ty);
+ codeview::TypeIndex lowerCompleteTypeUnion(const DICompositeType *Ty);
+
+ struct TypeLoweringScope;
+
+ void emitDeferredCompleteTypes();
+
+ void collectMemberInfo(ClassInfo &Info, const DIDerivedType *DDTy);
+ ClassInfo collectClassInfo(const DICompositeType *Ty);
+
+ /// Common record member lowering functionality for record types, which are
+ /// structs, classes, and unions. Returns the field list index and the member
+ /// count.
+ std::tuple<codeview::TypeIndex, codeview::TypeIndex, unsigned, bool>
+ lowerRecordFieldList(const DICompositeType *Ty);
+
+ /// Inserts {{Node, ClassTy}, TI} into TypeIndices and checks for duplicates.
+ codeview::TypeIndex recordTypeIndexForDINode(const DINode *Node,
+ codeview::TypeIndex TI,
+ const DIType *ClassTy = nullptr);
+
+ unsigned getPointerSizeInBytes();
+
+public:
+ CodeViewDebug(AsmPrinter *Asm);
+
+ void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {}
+
+ /// \brief Emit the COFF section that holds the line table information.
+ void endModule() override;
+
+ /// \brief Gather pre-function debug information.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// \brief Gather post-function debug information.
+ void endFunction(const MachineFunction *) override;
+
+ /// \brief Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI) override;
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 000000000000..2aaa85a58094
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,600 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DIE.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "DwarfUnit.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+ // Explicitly cast to an integer type for which FoldingSetNodeID has
+ // overloads. Otherwise MSVC 2010 thinks this call is ambiguous.
+ ID.AddInteger(unsigned(Attribute));
+ ID.AddInteger(unsigned(Form));
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(unsigned(Tag));
+ ID.AddInteger(unsigned(Children));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i)
+ Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(const AsmPrinter *AP) const {
+ // Emit its Dwarf tag type.
+ AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+
+ // Emit whether it has children DIEs.
+ AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children));
+
+ // For each attribute description.
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ const DIEAbbrevData &AttrData = Data[i];
+
+ // Emit attribute type.
+ AP->EmitULEB128(AttrData.getAttribute(),
+ dwarf::AttributeString(AttrData.getAttribute()));
+
+ // Emit form type.
+ AP->EmitULEB128(AttrData.getForm(),
+ dwarf::FormEncodingString(AttrData.getForm()));
+ }
+
+ // Mark end of abbreviation.
+ AP->EmitULEB128(0, "EOM(1)");
+ AP->EmitULEB128(0, "EOM(2)");
+}
+
+LLVM_DUMP_METHOD
+void DIEAbbrev::print(raw_ostream &O) {
+ O << "Abbreviation @"
+ << format("0x%lx", (long)(intptr_t)this)
+ << " "
+ << dwarf::TagString(Tag)
+ << " "
+ << dwarf::ChildrenString(Children)
+ << '\n';
+
+ for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+ O << " "
+ << dwarf::AttributeString(Data[i].getAttribute())
+ << " "
+ << dwarf::FormEncodingString(Data[i].getForm())
+ << '\n';
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEAbbrev::dump() { print(dbgs()); }
+
+DIEAbbrev DIE::generateAbbrev() const {
+ DIEAbbrev Abbrev(Tag, hasChildren());
+ for (const DIEValue &V : values())
+ Abbrev.AddAttribute(V.getAttribute(), V.getForm());
+ return Abbrev;
+}
+
+/// Climb up the parent chain to get the unit DIE to which this DIE
+/// belongs.
+const DIE *DIE::getUnit() const {
+ const DIE *Cu = getUnitOrNull();
+ assert(Cu && "We should not have orphaned DIEs.");
+ return Cu;
+}
+
+/// Climb up the parent chain to get the unit DIE this DIE belongs
+/// to. Return NULL if DIE is not added to an owner yet.
+const DIE *DIE::getUnitOrNull() const {
+ const DIE *p = this;
+ while (p) {
+ if (p->getTag() == dwarf::DW_TAG_compile_unit ||
+ p->getTag() == dwarf::DW_TAG_type_unit)
+ return p;
+ p = p->getParent();
+ }
+ return nullptr;
+}
+
+DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const {
+ // Iterate through all the attributes until we find the one we're
+ // looking for, if we can't find it return NULL.
+ for (const auto &V : values())
+ if (V.getAttribute() == Attribute)
+ return V;
+ return DIEValue();
+}
+
+LLVM_DUMP_METHOD
+static void printValues(raw_ostream &O, const DIEValueList &Values,
+ StringRef Type, unsigned Size, unsigned IndentCount) {
+ O << Type << ": Size: " << Size << "\n";
+
+ unsigned I = 0;
+ const std::string Indent(IndentCount, ' ');
+ for (const auto &V : Values.values()) {
+ O << Indent;
+ O << "Blk[" << I++ << "]";
+ O << " " << dwarf::FormEncodingString(V.getForm()) << " ";
+ V.print(O);
+ O << "\n";
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIE::print(raw_ostream &O, unsigned IndentCount) const {
+ const std::string Indent(IndentCount, ' ');
+ O << Indent << "Die: " << format("0x%lx", (long)(intptr_t) this)
+ << ", Offset: " << Offset << ", Size: " << Size << "\n";
+
+ O << Indent << dwarf::TagString(getTag()) << " "
+ << dwarf::ChildrenString(hasChildren()) << "\n";
+
+ IndentCount += 2;
+ for (const auto &V : values()) {
+ O << Indent;
+ O << dwarf::AttributeString(V.getAttribute());
+ O << " " << dwarf::FormEncodingString(V.getForm()) << " ";
+ V.print(O);
+ O << "\n";
+ }
+ IndentCount -= 2;
+
+ for (const auto &Child : children())
+ Child.print(O, IndentCount + 4);
+
+ O << "\n";
+}
+
+LLVM_DUMP_METHOD
+void DIE::dump() {
+ print(dbgs());
+}
+
+void DIEValue::EmitValue(const AsmPrinter *AP) const {
+ switch (Ty) {
+ case isNone:
+ llvm_unreachable("Expected valid DIEValue");
+#define HANDLE_DIEVALUE(T) \
+ case is##T: \
+ getDIE##T().EmitValue(AP, Form); \
+ break;
+#include "llvm/CodeGen/DIEValue.def"
+ }
+}
+
+unsigned DIEValue::SizeOf(const AsmPrinter *AP) const {
+ switch (Ty) {
+ case isNone:
+ llvm_unreachable("Expected valid DIEValue");
+#define HANDLE_DIEVALUE(T) \
+ case is##T: \
+ return getDIE##T().SizeOf(AP, Form);
+#include "llvm/CodeGen/DIEValue.def"
+ }
+ llvm_unreachable("Unknown DIE kind");
+}
+
+LLVM_DUMP_METHOD
+void DIEValue::print(raw_ostream &O) const {
+ switch (Ty) {
+ case isNone:
+ llvm_unreachable("Expected valid DIEValue");
+#define HANDLE_DIEVALUE(T) \
+ case is##T: \
+ getDIE##T().print(O); \
+ break;
+#include "llvm/CodeGen/DIEValue.def"
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEValue::dump() const {
+ print(dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
+ unsigned Size = ~0U;
+ switch (Form) {
+ case dwarf::DW_FORM_flag_present:
+ // Emit something to keep the lines and comments in sync.
+ // FIXME: Is there a better way to do this?
+ Asm->OutStreamer->AddBlankLine();
+ return;
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: Size = 1; break;
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: Size = 2; break;
+ case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_strp: // Fall thru
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: Size = 4; break;
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_ref_sig8: // Fall thru
+ case dwarf::DW_FORM_data8: Size = 8; break;
+ case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
+ case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+ case dwarf::DW_FORM_addr:
+ Size = Asm->getPointerSize();
+ break;
+ case dwarf::DW_FORM_ref_addr:
+ Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
+ break;
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+ Asm->OutStreamer->EmitIntValue(Integer, Size);
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_flag_present: return 0;
+ case dwarf::DW_FORM_flag: // Fall thru
+ case dwarf::DW_FORM_ref1: // Fall thru
+ case dwarf::DW_FORM_data1: return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2: // Fall thru
+ case dwarf::DW_FORM_data2: return sizeof(int16_t);
+ case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_strp: // Fall thru
+ case dwarf::DW_FORM_ref4: // Fall thru
+ case dwarf::DW_FORM_data4: return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8: // Fall thru
+ case dwarf::DW_FORM_ref_sig8: // Fall thru
+ case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_GNU_str_index: return getULEB128Size(Integer);
+ case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer);
+ case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
+ case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
+ case dwarf::DW_FORM_addr:
+ return AP->getPointerSize();
+ case dwarf::DW_FORM_ref_addr:
+ if (AP->OutStreamer->getContext().getDwarfVersion() == 2)
+ return AP->getPointerSize();
+ return sizeof(int32_t);
+ default: llvm_unreachable("DIE Value form not supported yet");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEInteger::print(raw_ostream &O) const {
+ O << "Int: " << (int64_t)Integer << " 0x";
+ O.write_hex(Integer);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEExpr Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit expression value.
+///
+void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of expression value in bytes.
+///
+unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_sec_offset) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
+ return AP->getPointerSize();
+}
+
+LLVM_DUMP_METHOD
+void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
+
+//===----------------------------------------------------------------------===//
+// DIELabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ AP->EmitLabelReference(Label, SizeOf(AP, Form),
+ Form == dwarf::DW_FORM_strp ||
+ Form == dwarf::DW_FORM_sec_offset ||
+ Form == dwarf::DW_FORM_ref_addr);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_sec_offset) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
+ return AP->getPointerSize();
+}
+
+LLVM_DUMP_METHOD
+void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_data4) return 4;
+ if (Form == dwarf::DW_FORM_sec_offset) return 4;
+ if (Form == dwarf::DW_FORM_strp) return 4;
+ return AP->getPointerSize();
+}
+
+LLVM_DUMP_METHOD
+void DIEDelta::print(raw_ostream &O) const {
+ O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
+}
+
+//===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ assert(
+ (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
+ "Expected valid string form");
+
+ // Index of string in symbol table.
+ if (Form == dwarf::DW_FORM_GNU_str_index) {
+ DIEInteger(S.getIndex()).EmitValue(AP, Form);
+ return;
+ }
+
+ // Relocatable symbol.
+ assert(Form == dwarf::DW_FORM_strp);
+ if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
+ DIELabel(S.getSymbol()).EmitValue(AP, Form);
+ return;
+ }
+
+ // Offset into symbol table.
+ DIEInteger(S.getOffset()).EmitValue(AP, Form);
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ assert(
+ (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
+ "Expected valid string form");
+
+ // Index of string in symbol table.
+ if (Form == dwarf::DW_FORM_GNU_str_index)
+ return DIEInteger(S.getIndex()).SizeOf(AP, Form);
+
+ // Relocatable symbol.
+ if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+ return DIELabel(S.getSymbol()).SizeOf(AP, Form);
+
+ // Offset into symbol table.
+ return DIEInteger(S.getOffset()).SizeOf(AP, Form);
+}
+
+LLVM_DUMP_METHOD
+void DIEString::print(raw_ostream &O) const {
+ O << "String: " << S.getString();
+}
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+
+ if (Form == dwarf::DW_FORM_ref_addr) {
+ const DwarfDebug *DD = AP->getDwarfDebug();
+ unsigned Addr = Entry->getOffset();
+ assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations.");
+ // For DW_FORM_ref_addr, output the offset from beginning of debug info
+ // section. Entry->getOffset() returns the offset from start of the
+ // compile unit.
+ DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit());
+ assert(CU && "CUDie should belong to a CU.");
+ Addr += CU->getDebugInfoOffset();
+ if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+ AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
+ DIEEntry::getRefAddrSize(AP));
+ else
+ AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP));
+ } else
+ AP->EmitInt32(Entry->getOffset());
+}
+
+unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
+ // DWARF4: References that use the attribute form DW_FORM_ref_addr are
+ // specified to be four bytes in the DWARF 32-bit format and eight bytes
+ // in the DWARF 64-bit format, while DWARF Version 2 specifies that such
+ // references have the same size as an address on the target system.
+ const DwarfDebug *DD = AP->getDwarfDebug();
+ assert(DD && "Expected Dwarf Debug info to be available");
+ if (DD->getDwarfVersion() == 2)
+ return AP->getPointerSize();
+ return sizeof(int32_t);
+}
+
+LLVM_DUMP_METHOD
+void DIEEntry::print(raw_ostream &O) const {
+ O << format("Die: 0x%lx", (long)(intptr_t)&Entry);
+}
+
+//===----------------------------------------------------------------------===//
+// DIELoc Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the location expression.
+///
+unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
+ if (!Size) {
+ for (const auto &V : values())
+ Size += V.SizeOf(AP);
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit location data.
+///
+void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
+ switch (Form) {
+ default: llvm_unreachable("Improper form for block");
+ case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block:
+ case dwarf::DW_FORM_exprloc:
+ Asm->EmitULEB128(Size); break;
+ }
+
+ for (const auto &V : values())
+ V.EmitValue(Asm);
+}
+
+/// SizeOf - Determine size of location data in bytes.
+///
+unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block:
+ case dwarf::DW_FORM_exprloc:
+ return Size + getULEB128Size(Size);
+ default: llvm_unreachable("Improper form for block");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIELoc::print(raw_ostream &O) const {
+ printValues(O, *this, "ExprLoc", Size, 5);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
+ if (!Size) {
+ for (const auto &V : values())
+ Size += V.SizeOf(AP);
+ }
+
+ return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
+ switch (Form) {
+ default: llvm_unreachable("Improper form for block");
+ case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
+ case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break;
+ case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break;
+ case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break;
+ }
+
+ for (const auto &V : values())
+ V.EmitValue(Asm);
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ switch (Form) {
+ case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+ case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+ case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+ case dwarf::DW_FORM_block: return Size + getULEB128Size(Size);
+ default: llvm_unreachable("Improper form for block");
+ }
+}
+
+LLVM_DUMP_METHOD
+void DIEBlock::print(raw_ostream &O) const {
+ printValues(O, *this, "Blk", Size, 5);
+}
+
+//===----------------------------------------------------------------------===//
+// DIELocList Implementation
+//===----------------------------------------------------------------------===//
+
+unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_data4)
+ return 4;
+ if (Form == dwarf::DW_FORM_sec_offset)
+ return 4;
+ return AP->getPointerSize();
+}
+
+/// EmitValue - Emit label value.
+///
+void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ DwarfDebug *DD = AP->getDwarfDebug();
+ MCSymbol *Label = DD->getDebugLocs().getList(Index).Label;
+ AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf());
+}
+
+LLVM_DUMP_METHOD
+void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
new file mode 100644
index 000000000000..74c47d151c62
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -0,0 +1,514 @@
+//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for DWARF4 hashing of DIEs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ByteStreamer.h"
+#include "DIEHash.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+/// \brief Grabs the string in whichever attribute is passed in and returns
+/// a reference to it.
+static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
+ // Iterate through all the attributes until we find the one we're
+ // looking for, if we can't find it return an empty string.
+ for (const auto &V : Die.values())
+ if (V.getAttribute() == Attr)
+ return V.getDIEString().getString();
+
+ return StringRef("");
+}
+
+/// \brief Adds the string in \p Str to the hash. This also hashes
+/// a trailing NULL with the string.
+void DIEHash::addString(StringRef Str) {
+ DEBUG(dbgs() << "Adding string " << Str << " to hash.\n");
+ Hash.update(Str);
+ Hash.update(makeArrayRef((uint8_t)'\0'));
+}
+
+// FIXME: The LEB128 routines are copied and only slightly modified out of
+// LEB128.h.
+
+/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128.
+void DIEHash::addULEB128(uint64_t Value) {
+ DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
+ do {
+ uint8_t Byte = Value & 0x7f;
+ Value >>= 7;
+ if (Value != 0)
+ Byte |= 0x80; // Mark this byte to show that more bytes will follow.
+ Hash.update(Byte);
+ } while (Value != 0);
+}
+
+void DIEHash::addSLEB128(int64_t Value) {
+ DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n");
+ bool More;
+ do {
+ uint8_t Byte = Value & 0x7f;
+ Value >>= 7;
+ More = !((((Value == 0) && ((Byte & 0x40) == 0)) ||
+ ((Value == -1) && ((Byte & 0x40) != 0))));
+ if (More)
+ Byte |= 0x80; // Mark this byte to show that more bytes will follow.
+ Hash.update(Byte);
+ } while (More);
+}
+
+/// \brief Including \p Parent adds the context of Parent to the hash..
+void DIEHash::addParentContext(const DIE &Parent) {
+
+ DEBUG(dbgs() << "Adding parent context to hash...\n");
+
+ // [7.27.2] For each surrounding type or namespace beginning with the
+ // outermost such construct...
+ SmallVector<const DIE *, 1> Parents;
+ const DIE *Cur = &Parent;
+ while (Cur->getParent()) {
+ Parents.push_back(Cur);
+ Cur = Cur->getParent();
+ }
+ assert(Cur->getTag() == dwarf::DW_TAG_compile_unit ||
+ Cur->getTag() == dwarf::DW_TAG_type_unit);
+
+ // Reverse iterate over our list to go from the outermost construct to the
+ // innermost.
+ for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(),
+ E = Parents.rend();
+ I != E; ++I) {
+ const DIE &Die = **I;
+
+ // ... Append the letter "C" to the sequence...
+ addULEB128('C');
+
+ // ... Followed by the DWARF tag of the construct...
+ addULEB128(Die.getTag());
+
+ // ... Then the name, taken from the DW_AT_name attribute.
+ StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
+ DEBUG(dbgs() << "... adding context: " << Name << "\n");
+ if (!Name.empty())
+ addString(Name);
+ }
+}
+
+// Collect all of the attributes for a particular DIE in single structure.
+void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) {
+#define COLLECT_ATTR(NAME) \
+ case dwarf::NAME: \
+ Attrs.NAME = V; \
+ break
+
+ for (const auto &V : Die.values()) {
+ DEBUG(dbgs() << "Attribute: "
+ << dwarf::AttributeString(V.getAttribute())
+ << " added.\n");
+ switch (V.getAttribute()) {
+ COLLECT_ATTR(DW_AT_name);
+ COLLECT_ATTR(DW_AT_accessibility);
+ COLLECT_ATTR(DW_AT_address_class);
+ COLLECT_ATTR(DW_AT_allocated);
+ COLLECT_ATTR(DW_AT_artificial);
+ COLLECT_ATTR(DW_AT_associated);
+ COLLECT_ATTR(DW_AT_binary_scale);
+ COLLECT_ATTR(DW_AT_bit_offset);
+ COLLECT_ATTR(DW_AT_bit_size);
+ COLLECT_ATTR(DW_AT_bit_stride);
+ COLLECT_ATTR(DW_AT_byte_size);
+ COLLECT_ATTR(DW_AT_byte_stride);
+ COLLECT_ATTR(DW_AT_const_expr);
+ COLLECT_ATTR(DW_AT_const_value);
+ COLLECT_ATTR(DW_AT_containing_type);
+ COLLECT_ATTR(DW_AT_count);
+ COLLECT_ATTR(DW_AT_data_bit_offset);
+ COLLECT_ATTR(DW_AT_data_location);
+ COLLECT_ATTR(DW_AT_data_member_location);
+ COLLECT_ATTR(DW_AT_decimal_scale);
+ COLLECT_ATTR(DW_AT_decimal_sign);
+ COLLECT_ATTR(DW_AT_default_value);
+ COLLECT_ATTR(DW_AT_digit_count);
+ COLLECT_ATTR(DW_AT_discr);
+ COLLECT_ATTR(DW_AT_discr_list);
+ COLLECT_ATTR(DW_AT_discr_value);
+ COLLECT_ATTR(DW_AT_encoding);
+ COLLECT_ATTR(DW_AT_enum_class);
+ COLLECT_ATTR(DW_AT_endianity);
+ COLLECT_ATTR(DW_AT_explicit);
+ COLLECT_ATTR(DW_AT_is_optional);
+ COLLECT_ATTR(DW_AT_location);
+ COLLECT_ATTR(DW_AT_lower_bound);
+ COLLECT_ATTR(DW_AT_mutable);
+ COLLECT_ATTR(DW_AT_ordering);
+ COLLECT_ATTR(DW_AT_picture_string);
+ COLLECT_ATTR(DW_AT_prototyped);
+ COLLECT_ATTR(DW_AT_small);
+ COLLECT_ATTR(DW_AT_segment);
+ COLLECT_ATTR(DW_AT_string_length);
+ COLLECT_ATTR(DW_AT_threads_scaled);
+ COLLECT_ATTR(DW_AT_upper_bound);
+ COLLECT_ATTR(DW_AT_use_location);
+ COLLECT_ATTR(DW_AT_use_UTF8);
+ COLLECT_ATTR(DW_AT_variable_parameter);
+ COLLECT_ATTR(DW_AT_virtuality);
+ COLLECT_ATTR(DW_AT_visibility);
+ COLLECT_ATTR(DW_AT_vtable_elem_location);
+ COLLECT_ATTR(DW_AT_type);
+ default:
+ break;
+ }
+ }
+}
+
+void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute,
+ const DIE &Entry, StringRef Name) {
+ // append the letter 'N'
+ addULEB128('N');
+
+ // the DWARF attribute code (DW_AT_type or DW_AT_friend),
+ addULEB128(Attribute);
+
+ // the context of the tag,
+ if (const DIE *Parent = Entry.getParent())
+ addParentContext(*Parent);
+
+ // the letter 'E',
+ addULEB128('E');
+
+ // and the name of the type.
+ addString(Name);
+
+ // Currently DW_TAG_friends are not used by Clang, but if they do become so,
+ // here's the relevant spec text to implement:
+ //
+ // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram,
+ // the context is omitted and the name to be used is the ABI-specific name
+ // of the subprogram (e.g., the mangled linker name).
+}
+
+void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute,
+ unsigned DieNumber) {
+ // a) If T is in the list of [previously hashed types], use the letter
+ // 'R' as the marker
+ addULEB128('R');
+
+ addULEB128(Attribute);
+
+ // and use the unsigned LEB128 encoding of [the index of T in the
+ // list] as the attribute value;
+ addULEB128(DieNumber);
+}
+
+void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
+ const DIE &Entry) {
+ assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend "
+ "tags. Add support here when there's "
+ "a use case");
+ // Step 5
+ // If the tag in Step 3 is one of [the below tags]
+ if ((Tag == dwarf::DW_TAG_pointer_type ||
+ Tag == dwarf::DW_TAG_reference_type ||
+ Tag == dwarf::DW_TAG_rvalue_reference_type ||
+ Tag == dwarf::DW_TAG_ptr_to_member_type) &&
+ // and the referenced type (via the [below attributes])
+ // FIXME: This seems overly restrictive, and causes hash mismatches
+ // there's a decl/def difference in the containing type of a
+ // ptr_to_member_type, but it's what DWARF says, for some reason.
+ Attribute == dwarf::DW_AT_type) {
+ // ... has a DW_AT_name attribute,
+ StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name);
+ if (!Name.empty()) {
+ hashShallowTypeReference(Attribute, Entry, Name);
+ return;
+ }
+ }
+
+ unsigned &DieNumber = Numbering[&Entry];
+ if (DieNumber) {
+ hashRepeatedTypeReference(Attribute, DieNumber);
+ return;
+ }
+
+ // otherwise, b) use the letter 'T' as the marker, ...
+ addULEB128('T');
+
+ addULEB128(Attribute);
+
+ // ... process the type T recursively by performing Steps 2 through 7, and
+ // use the result as the attribute value.
+ DieNumber = Numbering.size();
+ computeHash(Entry);
+}
+
+// Hash all of the values in a block like set of values. This assumes that
+// all of the data is going to be added as integers.
+void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
+ for (const auto &V : Values)
+ Hash.update((uint64_t)V.getDIEInteger().getValue());
+}
+
+// Hash the contents of a loclistptr class.
+void DIEHash::hashLocList(const DIELocList &LocList) {
+ HashingByteStreamer Streamer(*this);
+ DwarfDebug &DD = *AP->getDwarfDebug();
+ const DebugLocStream &Locs = DD.getDebugLocs();
+ for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue())))
+ DD.emitDebugLocEntry(Streamer, Entry);
+}
+
+// Hash an individual attribute \param Attr based on the type of attribute and
+// the form.
+void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
+ dwarf::Attribute Attribute = Value.getAttribute();
+
+ // Other attribute values use the letter 'A' as the marker, and the value
+ // consists of the form code (encoded as an unsigned LEB128 value) followed by
+ // the encoding of the value according to the form code. To ensure
+ // reproducibility of the signature, the set of forms used in the signature
+ // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag,
+ // DW_FORM_string, and DW_FORM_block.
+
+ switch (Value.getType()) {
+ case DIEValue::isNone:
+ llvm_unreachable("Expected valid DIEValue");
+
+ // 7.27 Step 3
+ // ... An attribute that refers to another type entry T is processed as
+ // follows:
+ case DIEValue::isEntry:
+ hashDIEEntry(Attribute, Tag, Value.getDIEEntry().getEntry());
+ break;
+ case DIEValue::isInteger: {
+ addULEB128('A');
+ addULEB128(Attribute);
+ switch (Value.getForm()) {
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_udata:
+ case dwarf::DW_FORM_sdata:
+ addULEB128(dwarf::DW_FORM_sdata);
+ addSLEB128((int64_t)Value.getDIEInteger().getValue());
+ break;
+ // DW_FORM_flag_present is just flag with a value of one. We still give it a
+ // value so just use the value.
+ case dwarf::DW_FORM_flag_present:
+ case dwarf::DW_FORM_flag:
+ addULEB128(dwarf::DW_FORM_flag);
+ addULEB128((int64_t)Value.getDIEInteger().getValue());
+ break;
+ default:
+ llvm_unreachable("Unknown integer form!");
+ }
+ break;
+ }
+ case DIEValue::isString:
+ addULEB128('A');
+ addULEB128(Attribute);
+ addULEB128(dwarf::DW_FORM_string);
+ addString(Value.getDIEString().getString());
+ break;
+ case DIEValue::isBlock:
+ case DIEValue::isLoc:
+ case DIEValue::isLocList:
+ addULEB128('A');
+ addULEB128(Attribute);
+ addULEB128(dwarf::DW_FORM_block);
+ if (Value.getType() == DIEValue::isBlock) {
+ addULEB128(Value.getDIEBlock().ComputeSize(AP));
+ hashBlockData(Value.getDIEBlock().values());
+ } else if (Value.getType() == DIEValue::isLoc) {
+ addULEB128(Value.getDIELoc().ComputeSize(AP));
+ hashBlockData(Value.getDIELoc().values());
+ } else {
+ // We could add the block length, but that would take
+ // a bit of work and not add a lot of uniqueness
+ // to the hash in some way we could test.
+ hashLocList(Value.getDIELocList());
+ }
+ break;
+ // FIXME: It's uncertain whether or not we should handle this at the moment.
+ case DIEValue::isExpr:
+ case DIEValue::isLabel:
+ case DIEValue::isDelta:
+ llvm_unreachable("Add support for additional value types.");
+ }
+}
+
+// Go through the attributes from \param Attrs in the order specified in 7.27.4
+// and hash them.
+void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) {
+#define ADD_ATTR(ATTR) \
+ { \
+ if (ATTR) \
+ hashAttribute(ATTR, Tag); \
+ }
+
+ ADD_ATTR(Attrs.DW_AT_name);
+ ADD_ATTR(Attrs.DW_AT_accessibility);
+ ADD_ATTR(Attrs.DW_AT_address_class);
+ ADD_ATTR(Attrs.DW_AT_allocated);
+ ADD_ATTR(Attrs.DW_AT_artificial);
+ ADD_ATTR(Attrs.DW_AT_associated);
+ ADD_ATTR(Attrs.DW_AT_binary_scale);
+ ADD_ATTR(Attrs.DW_AT_bit_offset);
+ ADD_ATTR(Attrs.DW_AT_bit_size);
+ ADD_ATTR(Attrs.DW_AT_bit_stride);
+ ADD_ATTR(Attrs.DW_AT_byte_size);
+ ADD_ATTR(Attrs.DW_AT_byte_stride);
+ ADD_ATTR(Attrs.DW_AT_const_expr);
+ ADD_ATTR(Attrs.DW_AT_const_value);
+ ADD_ATTR(Attrs.DW_AT_containing_type);
+ ADD_ATTR(Attrs.DW_AT_count);
+ ADD_ATTR(Attrs.DW_AT_data_bit_offset);
+ ADD_ATTR(Attrs.DW_AT_data_location);
+ ADD_ATTR(Attrs.DW_AT_data_member_location);
+ ADD_ATTR(Attrs.DW_AT_decimal_scale);
+ ADD_ATTR(Attrs.DW_AT_decimal_sign);
+ ADD_ATTR(Attrs.DW_AT_default_value);
+ ADD_ATTR(Attrs.DW_AT_digit_count);
+ ADD_ATTR(Attrs.DW_AT_discr);
+ ADD_ATTR(Attrs.DW_AT_discr_list);
+ ADD_ATTR(Attrs.DW_AT_discr_value);
+ ADD_ATTR(Attrs.DW_AT_encoding);
+ ADD_ATTR(Attrs.DW_AT_enum_class);
+ ADD_ATTR(Attrs.DW_AT_endianity);
+ ADD_ATTR(Attrs.DW_AT_explicit);
+ ADD_ATTR(Attrs.DW_AT_is_optional);
+ ADD_ATTR(Attrs.DW_AT_location);
+ ADD_ATTR(Attrs.DW_AT_lower_bound);
+ ADD_ATTR(Attrs.DW_AT_mutable);
+ ADD_ATTR(Attrs.DW_AT_ordering);
+ ADD_ATTR(Attrs.DW_AT_picture_string);
+ ADD_ATTR(Attrs.DW_AT_prototyped);
+ ADD_ATTR(Attrs.DW_AT_small);
+ ADD_ATTR(Attrs.DW_AT_segment);
+ ADD_ATTR(Attrs.DW_AT_string_length);
+ ADD_ATTR(Attrs.DW_AT_threads_scaled);
+ ADD_ATTR(Attrs.DW_AT_upper_bound);
+ ADD_ATTR(Attrs.DW_AT_use_location);
+ ADD_ATTR(Attrs.DW_AT_use_UTF8);
+ ADD_ATTR(Attrs.DW_AT_variable_parameter);
+ ADD_ATTR(Attrs.DW_AT_virtuality);
+ ADD_ATTR(Attrs.DW_AT_visibility);
+ ADD_ATTR(Attrs.DW_AT_vtable_elem_location);
+ ADD_ATTR(Attrs.DW_AT_type);
+
+ // FIXME: Add the extended attributes.
+}
+
+// Add all of the attributes for \param Die to the hash.
+void DIEHash::addAttributes(const DIE &Die) {
+ DIEAttrs Attrs = {};
+ collectAttributes(Die, Attrs);
+ hashAttributes(Attrs, Die.getTag());
+}
+
+void DIEHash::hashNestedType(const DIE &Die, StringRef Name) {
+ // 7.27 Step 7
+ // ... append the letter 'S',
+ addULEB128('S');
+
+ // the tag of C,
+ addULEB128(Die.getTag());
+
+ // and the name.
+ addString(Name);
+}
+
+// Compute the hash of a DIE. This is based on the type signature computation
+// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a
+// flattened description of the DIE.
+void DIEHash::computeHash(const DIE &Die) {
+ // Append the letter 'D', followed by the DWARF tag of the DIE.
+ addULEB128('D');
+ addULEB128(Die.getTag());
+
+ // Add each of the attributes of the DIE.
+ addAttributes(Die);
+
+ // Then hash each of the children of the DIE.
+ for (auto &C : Die.children()) {
+ // 7.27 Step 7
+ // If C is a nested type entry or a member function entry, ...
+ if (isType(C.getTag()) || C.getTag() == dwarf::DW_TAG_subprogram) {
+ StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);
+ // ... and has a DW_AT_name attribute
+ if (!Name.empty()) {
+ hashNestedType(C, Name);
+ continue;
+ }
+ }
+ computeHash(C);
+ }
+
+ // Following the last (or if there are no children), append a zero byte.
+ Hash.update(makeArrayRef((uint8_t)'\0'));
+}
+
+/// This is based on the type signature computation given in section 7.27 of the
+/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
+/// with the inclusion of the full CU and all top level CU entities.
+// TODO: Initialize the type chain at 0 instead of 1 for CU signatures.
+uint64_t DIEHash::computeCUSignature(const DIE &Die) {
+ Numbering.clear();
+ Numbering[&Die] = 1;
+
+ // Hash the DIE.
+ computeHash(Die);
+
+ // Now return the result.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, swap bytes
+ // appropriately.
+ return support::endian::read64le(Result + 8);
+}
+
+/// This is based on the type signature computation given in section 7.27 of the
+/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
+/// with the inclusion of additional forms not specifically called out in the
+/// standard.
+uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
+ Numbering.clear();
+ Numbering[&Die] = 1;
+
+ if (const DIE *Parent = Die.getParent())
+ addParentContext(*Parent);
+
+ // Hash the DIE.
+ computeHash(Die);
+
+ // Now return the result.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, swap bytes
+ // appropriately.
+ return support::endian::read64le(Result + 8);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
new file mode 100644
index 000000000000..996cd7ef3d2e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -0,0 +1,159 @@
+//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for DWARF4 hashing of DIEs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/Support/MD5.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class CompileUnit;
+
+/// \brief An object containing the capability of hashing and adding hash
+/// attributes onto a DIE.
+class DIEHash {
+ // Collection of all attributes used in hashing a particular DIE.
+ struct DIEAttrs {
+ DIEValue DW_AT_name;
+ DIEValue DW_AT_accessibility;
+ DIEValue DW_AT_address_class;
+ DIEValue DW_AT_allocated;
+ DIEValue DW_AT_artificial;
+ DIEValue DW_AT_associated;
+ DIEValue DW_AT_binary_scale;
+ DIEValue DW_AT_bit_offset;
+ DIEValue DW_AT_bit_size;
+ DIEValue DW_AT_bit_stride;
+ DIEValue DW_AT_byte_size;
+ DIEValue DW_AT_byte_stride;
+ DIEValue DW_AT_const_expr;
+ DIEValue DW_AT_const_value;
+ DIEValue DW_AT_containing_type;
+ DIEValue DW_AT_count;
+ DIEValue DW_AT_data_bit_offset;
+ DIEValue DW_AT_data_location;
+ DIEValue DW_AT_data_member_location;
+ DIEValue DW_AT_decimal_scale;
+ DIEValue DW_AT_decimal_sign;
+ DIEValue DW_AT_default_value;
+ DIEValue DW_AT_digit_count;
+ DIEValue DW_AT_discr;
+ DIEValue DW_AT_discr_list;
+ DIEValue DW_AT_discr_value;
+ DIEValue DW_AT_encoding;
+ DIEValue DW_AT_enum_class;
+ DIEValue DW_AT_endianity;
+ DIEValue DW_AT_explicit;
+ DIEValue DW_AT_is_optional;
+ DIEValue DW_AT_location;
+ DIEValue DW_AT_lower_bound;
+ DIEValue DW_AT_mutable;
+ DIEValue DW_AT_ordering;
+ DIEValue DW_AT_picture_string;
+ DIEValue DW_AT_prototyped;
+ DIEValue DW_AT_small;
+ DIEValue DW_AT_segment;
+ DIEValue DW_AT_string_length;
+ DIEValue DW_AT_threads_scaled;
+ DIEValue DW_AT_upper_bound;
+ DIEValue DW_AT_use_location;
+ DIEValue DW_AT_use_UTF8;
+ DIEValue DW_AT_variable_parameter;
+ DIEValue DW_AT_virtuality;
+ DIEValue DW_AT_visibility;
+ DIEValue DW_AT_vtable_elem_location;
+ DIEValue DW_AT_type;
+
+ // Insert any additional ones here...
+ };
+
+public:
+ DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
+
+ /// \brief Computes the CU signature.
+ uint64_t computeCUSignature(const DIE &Die);
+
+ /// \brief Computes the type signature.
+ uint64_t computeTypeSignature(const DIE &Die);
+
+ // Helper routines to process parts of a DIE.
+private:
+ /// \brief Adds the parent context of \param Die to the hash.
+ void addParentContext(const DIE &Die);
+
+ /// \brief Adds the attributes of \param Die to the hash.
+ void addAttributes(const DIE &Die);
+
+ /// \brief Computes the full DWARF4 7.27 hash of the DIE.
+ void computeHash(const DIE &Die);
+
+ // Routines that add DIEValues to the hash.
+public:
+ /// \brief Adds \param Value to the hash.
+ void update(uint8_t Value) { Hash.update(Value); }
+
+ /// \brief Encodes and adds \param Value to the hash as a ULEB128.
+ void addULEB128(uint64_t Value);
+
+ /// \brief Encodes and adds \param Value to the hash as a SLEB128.
+ void addSLEB128(int64_t Value);
+
+private:
+ /// \brief Adds \param Str to the hash and includes a NULL byte.
+ void addString(StringRef Str);
+
+ /// \brief Collects the attributes of DIE \param Die into the \param Attrs
+ /// structure.
+ void collectAttributes(const DIE &Die, DIEAttrs &Attrs);
+
+ /// \brief Hashes the attributes in \param Attrs in order.
+ void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag);
+
+ /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or
+ /// DW_FORM_exprloc.
+ void hashBlockData(const DIE::const_value_range &Values);
+
+ /// \brief Hashes the contents pointed to in the .debug_loc section.
+ void hashLocList(const DIELocList &LocList);
+
+ /// \brief Hashes an individual attribute.
+ void hashAttribute(const DIEValue &Value, dwarf::Tag Tag);
+
+ /// \brief Hashes an attribute that refers to another DIE.
+ void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
+ const DIE &Entry);
+
+ /// \brief Hashes a reference to a named type in such a way that is
+ /// independent of whether that type is described by a declaration or a
+ /// definition.
+ void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry,
+ StringRef Name);
+
+ /// \brief Hashes a reference to a previously referenced type DIE.
+ void hashRepeatedTypeReference(dwarf::Attribute Attribute,
+ unsigned DieNumber);
+
+ void hashNestedType(const DIE &Die, StringRef Name);
+
+private:
+ MD5 Hash;
+ AsmPrinter *AP;
+ DenseMap<const DIE *, unsigned> Numbering;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
new file mode 100644
index 000000000000..adc536f1add8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -0,0 +1,246 @@
+//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DbgValueHistoryCalculator.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+// \brief If @MI is a DBG_VALUE with debug value described by a
+// defined register, returns the number of this register.
+// In the other case, returns 0.
+static unsigned isDescribedByReg(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ assert(MI.getNumOperands() == 4);
+ // If location of variable is described using a register (directly or
+ // indirecltly), this register is always a first operand.
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+}
+
+void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
+ const MachineInstr &MI) {
+ // Instruction range should start with a DBG_VALUE instruction for the
+ // variable.
+ assert(MI.isDebugValue() && "not a DBG_VALUE");
+ auto &Ranges = VarInstrRanges[Var];
+ if (!Ranges.empty() && Ranges.back().second == nullptr &&
+ Ranges.back().first->isIdenticalTo(MI)) {
+ DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
+ << "\t" << Ranges.back().first << "\t" << MI << "\n");
+ return;
+ }
+ Ranges.push_back(std::make_pair(&MI, nullptr));
+}
+
+void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
+ const MachineInstr &MI) {
+ auto &Ranges = VarInstrRanges[Var];
+ // Verify that the current instruction range is not yet closed.
+ assert(!Ranges.empty() && Ranges.back().second == nullptr);
+ // For now, instruction ranges are not allowed to cross basic block
+ // boundaries.
+ assert(Ranges.back().first->getParent() == MI.getParent());
+ Ranges.back().second = &MI;
+}
+
+unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
+ const auto &I = VarInstrRanges.find(Var);
+ if (I == VarInstrRanges.end())
+ return 0;
+ const auto &Ranges = I->second;
+ if (Ranges.empty() || Ranges.back().second != nullptr)
+ return 0;
+ return isDescribedByReg(*Ranges.back().first);
+}
+
+namespace {
+// Maps physreg numbers to the variables they describe.
+typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap;
+}
+
+// \brief Claim that @Var is not described by @RegNo anymore.
+static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+ InlinedVariable Var) {
+ const auto &I = RegVars.find(RegNo);
+ assert(RegNo != 0U && I != RegVars.end());
+ auto &VarSet = I->second;
+ const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var);
+ assert(VarPos != VarSet.end());
+ VarSet.erase(VarPos);
+ // Don't keep empty sets in a map to keep it as small as possible.
+ if (VarSet.empty())
+ RegVars.erase(I);
+}
+
+// \brief Claim that @Var is now described by @RegNo.
+static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+ InlinedVariable Var) {
+ assert(RegNo != 0U);
+ auto &VarSet = RegVars[RegNo];
+ assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end());
+ VarSet.push_back(Var);
+}
+
+// \brief Terminate the location range for variables described by register at
+// @I by inserting @ClobberingInstr to their history.
+static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
+ RegDescribedVarsMap::iterator I,
+ DbgValueHistoryMap &HistMap,
+ const MachineInstr &ClobberingInstr) {
+ // Iterate over all variables described by this register and add this
+ // instruction to their history, clobbering it.
+ for (const auto &Var : I->second)
+ HistMap.endInstrRange(Var, ClobberingInstr);
+ RegVars.erase(I);
+}
+
+// \brief Terminate the location range for variables described by register
+// @RegNo by inserting @ClobberingInstr to their history.
+static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
+ DbgValueHistoryMap &HistMap,
+ const MachineInstr &ClobberingInstr) {
+ const auto &I = RegVars.find(RegNo);
+ if (I == RegVars.end())
+ return;
+ clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr);
+}
+
+// \brief Returns the first instruction in @MBB which corresponds to
+// the function epilogue, or nullptr if @MBB doesn't contain an epilogue.
+static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
+ auto LastMI = MBB.getLastNonDebugInstr();
+ if (LastMI == MBB.end() || !LastMI->isReturn())
+ return nullptr;
+ // Assume that epilogue starts with instruction having the same debug location
+ // as the return instruction.
+ DebugLoc LastLoc = LastMI->getDebugLoc();
+ auto Res = LastMI;
+ for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)),
+ E = MBB.rend();
+ I != E; ++I) {
+ if (I->getDebugLoc() != LastLoc)
+ return &*Res;
+ Res = &*I;
+ }
+ // If all instructions have the same debug location, assume whole MBB is
+ // an epilogue.
+ return &*MBB.begin();
+}
+
+// \brief Collect registers that are modified in the function body (their
+// contents is changed outside of the prologue and epilogue).
+static void collectChangingRegs(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ BitVector &Regs) {
+ for (const auto &MBB : *MF) {
+ auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
+
+ for (const auto &MI : MBB) {
+ // Avoid looking at prologue or epilogue instructions.
+ if (&MI == FirstEpilogueInst)
+ break;
+ if (MI.getFlag(MachineInstr::FrameSetup))
+ continue;
+
+ // Look for register defs and register masks. Register masks are
+ // typically on calls and they clobber everything not in the mask.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+ ++AI)
+ Regs.set(*AI);
+ } else if (MO.isRegMask()) {
+ Regs.setBitsNotInMask(MO.getRegMask());
+ }
+ }
+ }
+ }
+}
+
+void llvm::calculateDbgValueHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &Result) {
+ BitVector ChangingRegs(TRI->getNumRegs());
+ collectChangingRegs(MF, TRI, ChangingRegs);
+
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ RegDescribedVarsMap RegVars;
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (!MI.isDebugValue()) {
+ // Not a DBG_VALUE instruction. It may clobber registers which describe
+ // some variables.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // If this is a register def operand, it may end a debug value
+ // range.
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+ ++AI)
+ if (ChangingRegs.test(*AI))
+ clobberRegisterUses(RegVars, *AI, Result, MI);
+ } else if (MO.isRegMask()) {
+ // If this is a register mask operand, clobber all debug values in
+ // non-CSRs.
+ for (int I = ChangingRegs.find_first(); I != -1;
+ I = ChangingRegs.find_next(I)) {
+ // Don't consider SP to be clobbered by register masks.
+ if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
+ MO.clobbersPhysReg(I)) {
+ clobberRegisterUses(RegVars, I, Result, MI);
+ }
+ }
+ }
+ }
+ continue;
+ }
+
+ assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
+ // Use the base variable (without any DW_OP_piece expressions)
+ // as index into History. The full variables including the
+ // piece expressions are attached to the MI.
+ const DILocalVariable *RawVar = MI.getDebugVariable();
+ assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
+
+ if (unsigned PrevReg = Result.getRegisterForVar(Var))
+ dropRegDescribedVar(RegVars, PrevReg, Var);
+
+ Result.startInstrRange(Var, MI);
+
+ if (unsigned NewReg = isDescribedByReg(MI))
+ addRegDescribedVar(RegVars, NewReg, Var);
+ }
+
+ // Make sure locations for register-described variables are valid only
+ // until the end of the basic block (unless it's the last basic block, in
+ // which case let their liveness run off to the end of the function).
+ if (!MBB.empty() && &MBB != &MF->back()) {
+ for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) {
+ auto CurElem = I++; // CurElem can be erased below.
+ if (ChangingRegs.test(CurElem->first))
+ clobberRegisterUses(RegVars, CurElem, Result, MBB.back());
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
new file mode 100644
index 000000000000..16d2d7fd7e99
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -0,0 +1,59 @@
+//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+
+namespace llvm {
+
+class MachineFunction;
+class MachineInstr;
+class TargetRegisterInfo;
+
+// For each user variable, keep a list of instruction ranges where this variable
+// is accessible. The variables are listed in order of appearance.
+class DbgValueHistoryMap {
+ // Each instruction range starts with a DBG_VALUE instruction, specifying the
+ // location of a variable, which is assumed to be valid until the end of the
+ // range. If end is not specified, location is valid until the start
+ // instruction of the next instruction range, or until the end of the
+ // function.
+public:
+ typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
+ typedef SmallVector<InstrRange, 4> InstrRanges;
+ typedef std::pair<const DILocalVariable *, const DILocation *>
+ InlinedVariable;
+ typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap;
+
+private:
+ InstrRangesMap VarInstrRanges;
+
+public:
+ void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
+ void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
+ // Returns register currently describing @Var. If @Var is currently
+ // unaccessible or is not described by a register, returns 0.
+ unsigned getRegisterForVar(InlinedVariable Var) const;
+
+ bool empty() const { return VarInstrRanges.empty(); }
+ void clear() { VarInstrRanges.clear(); }
+ InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); }
+ InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); }
+};
+
+void calculateDbgValueHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &Result);
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
new file mode 100644
index 000000000000..16ffe2e15acd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -0,0 +1,230 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for different debug information format backends.
+// LLVM currently supports DWARF and CodeView.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DebugHandlerBase.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
+
+// Each LexicalScope has first instruction and last instruction to mark
+// beginning and end of a scope respectively. Create an inverse map that list
+// scopes starts (and ends) with an instruction. One instruction may start (or
+// end) multiple scopes. Ignore scopes that are not reachable.
+void DebugHandlerBase::identifyScopeMarkers() {
+ SmallVector<LexicalScope *, 4> WorkList;
+ WorkList.push_back(LScopes.getCurrentFunctionScope());
+ while (!WorkList.empty()) {
+ LexicalScope *S = WorkList.pop_back_val();
+
+ const SmallVectorImpl<LexicalScope *> &Children = S->getChildren();
+ if (!Children.empty())
+ WorkList.append(Children.begin(), Children.end());
+
+ if (S->isAbstractScope())
+ continue;
+
+ for (const InsnRange &R : S->getRanges()) {
+ assert(R.first && "InsnRange does not have first instruction!");
+ assert(R.second && "InsnRange does not have second instruction!");
+ requestLabelBeforeInsn(R.first);
+ requestLabelAfterInsn(R.second);
+ }
+ }
+}
+
+// Return Label preceding the instruction.
+MCSymbol *DebugHandlerBase::getLabelBeforeInsn(const MachineInstr *MI) {
+ MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
+ assert(Label && "Didn't insert label before instruction");
+ return Label;
+}
+
+// Return Label immediately following the instruction.
+MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
+ return LabelsAfterInsn.lookup(MI);
+}
+
+// Determine the relative position of the pieces described by P1 and P2.
+// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
+// 1 if P1 is entirely after P2.
+int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) {
+ unsigned l1 = P1->getBitPieceOffset();
+ unsigned l2 = P2->getBitPieceOffset();
+ unsigned r1 = l1 + P1->getBitPieceSize();
+ unsigned r2 = l2 + P2->getBitPieceSize();
+ if (r1 <= l2)
+ return -1;
+ else if (r2 <= l1)
+ return 1;
+ else
+ return 0;
+}
+
+/// Determine whether two variable pieces overlap.
+bool DebugHandlerBase::piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
+ if (!P1->isBitPiece() || !P2->isBitPiece())
+ return true;
+ return pieceCmp(P1, P2) == 0;
+}
+
+/// If this type is derived from a base type then return base type size.
+uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
+ DIType *Ty = TyRef.resolve();
+ assert(Ty);
+ DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty);
+ if (!DDTy)
+ return Ty->getSizeInBits();
+
+ unsigned Tag = DDTy->getTag();
+
+ if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
+ Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
+ Tag != dwarf::DW_TAG_restrict_type)
+ return DDTy->getSizeInBits();
+
+ DIType *BaseType = DDTy->getBaseType().resolve();
+
+ assert(BaseType && "Unexpected invalid base type");
+
+ // If this is a derived type, go ahead and get the base type, unless it's a
+ // reference then it's just the size of the field. Pointer types have no need
+ // of this since they're a different type of qualification on the type.
+ if (BaseType->getTag() == dwarf::DW_TAG_reference_type ||
+ BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type)
+ return Ty->getSizeInBits();
+
+ return getBaseTypeSize(BaseType);
+}
+
+void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
+ // Grab the lexical scopes for the function, if we don't have any of those
+ // then we're not going to be able to do anything.
+ LScopes.initialize(*MF);
+ if (LScopes.empty())
+ return;
+
+ // Make sure that each lexical scope will have a begin/end label.
+ identifyScopeMarkers();
+
+ // Calculate history for local variables.
+ assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");
+ calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
+ DbgValues);
+
+ // Request labels for the full history.
+ for (const auto &I : DbgValues) {
+ const auto &Ranges = I.second;
+ if (Ranges.empty())
+ continue;
+
+ // The first mention of a function argument gets the CurrentFnBegin
+ // label, so arguments are visible when breaking at function entry.
+ const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
+ if (DIVar->isParameter() &&
+ getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
+ LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
+ if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
+ // Mark all non-overlapping initial pieces.
+ for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
+ const DIExpression *Piece = I->first->getDebugExpression();
+ if (std::all_of(Ranges.begin(), I,
+ [&](DbgValueHistoryMap::InstrRange Pred) {
+ return !piecesOverlap(Piece, Pred.first->getDebugExpression());
+ }))
+ LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
+ else
+ break;
+ }
+ }
+ }
+
+ for (const auto &Range : Ranges) {
+ requestLabelBeforeInsn(Range.first);
+ if (Range.second)
+ requestLabelAfterInsn(Range.second);
+ }
+ }
+
+ PrevInstLoc = DebugLoc();
+ PrevLabel = Asm->getFunctionBegin();
+}
+
+void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
+ if (!MMI->hasDebugInfo())
+ return;
+
+ assert(CurMI == nullptr);
+ CurMI = MI;
+
+ // Insert labels where requested.
+ DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+ LabelsBeforeInsn.find(MI);
+
+ // No label needed.
+ if (I == LabelsBeforeInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().createTempSymbol();
+ Asm->OutStreamer->EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+void DebugHandlerBase::endInstruction() {
+ if (!MMI->hasDebugInfo())
+ return;
+
+ assert(CurMI != nullptr);
+ // Don't create a new label after DBG_VALUE instructions.
+ // They don't generate code.
+ if (!CurMI->isDebugValue())
+ PrevLabel = nullptr;
+
+ DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+ LabelsAfterInsn.find(CurMI);
+ CurMI = nullptr;
+
+ // No label needed.
+ if (I == LabelsAfterInsn.end())
+ return;
+
+ // Label already assigned.
+ if (I->second)
+ return;
+
+ // We need a label after this instruction.
+ if (!PrevLabel) {
+ PrevLabel = MMI->getContext().createTempSymbol();
+ Asm->OutStreamer->EmitLabel(PrevLabel);
+ }
+ I->second = PrevLabel;
+}
+
+void DebugHandlerBase::endFunction(const MachineFunction *MF) {
+ DbgValues.clear();
+ LabelsBeforeInsn.clear();
+ LabelsAfterInsn.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
new file mode 100644
index 000000000000..b8bbcec133fd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -0,0 +1,109 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for different debug information format backends.
+// LLVM currently supports DWARF and CodeView.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H
+
+#include "AsmPrinterHandler.h"
+#include "DbgValueHistoryCalculator.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MachineModuleInfo;
+
+/// Base class for debug information backends. Common functionality related to
+/// tracking which variables and scopes are alive at a given PC live here.
+class DebugHandlerBase : public AsmPrinterHandler {
+protected:
+ DebugHandlerBase(AsmPrinter *A);
+
+ /// Target of debug info emission.
+ AsmPrinter *Asm;
+
+ /// Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// Previous instruction's location information. This is used to
+ /// determine label location to indicate scope boundries in dwarf
+ /// debug info.
+ DebugLoc PrevInstLoc;
+ MCSymbol *PrevLabel = nullptr;
+
+ /// This location indicates end of function prologue and beginning of
+ /// function body.
+ DebugLoc PrologEndLoc;
+
+ /// If nonnull, stores the current machine instruction we're processing.
+ const MachineInstr *CurMI = nullptr;
+
+ LexicalScopes LScopes;
+
+ /// History of DBG_VALUE and clobber instructions for each user
+ /// variable. Variables are listed in order of appearance.
+ DbgValueHistoryMap DbgValues;
+
+ /// Maps instruction with label emitted before instruction.
+ /// FIXME: Make this private from DwarfDebug, we have the necessary accessors
+ /// for it.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+ /// Maps instruction with label emitted after instruction.
+ DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+ /// Indentify instructions that are marking the beginning of or
+ /// ending of a scope.
+ void identifyScopeMarkers();
+
+ /// Ensure that a label will be emitted before MI.
+ void requestLabelBeforeInsn(const MachineInstr *MI) {
+ LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
+ }
+
+ /// Ensure that a label will be emitted after MI.
+ void requestLabelAfterInsn(const MachineInstr *MI) {
+ LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
+ }
+
+ // AsmPrinterHandler overrides.
+public:
+ void beginInstruction(const MachineInstr *MI) override;
+ void endInstruction() override;
+
+ void beginFunction(const MachineFunction *MF) override;
+ void endFunction(const MachineFunction *MF) override;
+
+ /// Return Label preceding the instruction.
+ MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+ /// Return Label immediately following the instruction.
+ MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+ /// Determine the relative position of the pieces described by P1 and P2.
+ /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
+ /// 1 if P1 is entirely after P2.
+ static int pieceCmp(const DIExpression *P1, const DIExpression *P2);
+
+ /// Determine whether two variable pieces overlap.
+ static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2);
+
+ /// If this type is derived from a base type then return base type size.
+ static uint64_t getBaseTypeSize(const DITypeRef TyRef);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
new file mode 100644
index 000000000000..20acd45e5720
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -0,0 +1,184 @@
+//===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+
+#include "DebugLocStream.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+class AsmPrinter;
+
+/// \brief This struct describes location entries emitted in the .debug_loc
+/// section.
+class DebugLocEntry {
+ /// Begin and end symbols for the address range that this location is valid.
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+
+public:
+ /// \brief A single location or constant.
+ struct Value {
+ Value(const DIExpression *Expr, int64_t i)
+ : Expression(Expr), EntryKind(E_Integer) {
+ Constant.Int = i;
+ }
+ Value(const DIExpression *Expr, const ConstantFP *CFP)
+ : Expression(Expr), EntryKind(E_ConstantFP) {
+ Constant.CFP = CFP;
+ }
+ Value(const DIExpression *Expr, const ConstantInt *CIP)
+ : Expression(Expr), EntryKind(E_ConstantInt) {
+ Constant.CIP = CIP;
+ }
+ Value(const DIExpression *Expr, MachineLocation Loc)
+ : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
+ assert(cast<DIExpression>(Expr)->isValid());
+ }
+
+ /// Any complex address location expression for this Value.
+ const DIExpression *Expression;
+
+ /// Type of entry that this represents.
+ enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
+ enum EntryType EntryKind;
+
+ /// Either a constant,
+ union {
+ int64_t Int;
+ const ConstantFP *CFP;
+ const ConstantInt *CIP;
+ } Constant;
+
+ // Or a location in the machine frame.
+ MachineLocation Loc;
+
+ bool isLocation() const { return EntryKind == E_Location; }
+ bool isInt() const { return EntryKind == E_Integer; }
+ bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+ bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+ int64_t getInt() const { return Constant.Int; }
+ const ConstantFP *getConstantFP() const { return Constant.CFP; }
+ const ConstantInt *getConstantInt() const { return Constant.CIP; }
+ MachineLocation getLoc() const { return Loc; }
+ bool isBitPiece() const { return getExpression()->isBitPiece(); }
+ const DIExpression *getExpression() const { return Expression; }
+ friend bool operator==(const Value &, const Value &);
+ friend bool operator<(const Value &, const Value &);
+ void dump() const {
+ if (isLocation()) {
+ llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
+ if (Loc.isIndirect())
+ llvm::dbgs() << '+' << Loc.getOffset();
+ llvm::dbgs() << "} ";
+ }
+ else if (isConstantInt())
+ Constant.CIP->dump();
+ else if (isConstantFP())
+ Constant.CFP->dump();
+ if (Expression)
+ Expression->dump();
+ }
+ };
+
+private:
+ /// A nonempty list of locations/constants belonging to this entry,
+ /// sorted by offset.
+ SmallVector<Value, 1> Values;
+
+public:
+ DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val)
+ : Begin(B), End(E) {
+ Values.push_back(std::move(Val));
+ }
+
+ /// \brief If this and Next are describing different pieces of the same
+ /// variable, merge them by appending Next's values to the current
+ /// list of values.
+ /// Return true if the merge was successful.
+ bool MergeValues(const DebugLocEntry &Next);
+
+ /// \brief Attempt to merge this DebugLocEntry with Next and return
+ /// true if the merge was successful. Entries can be merged if they
+ /// share the same Loc/Constant and if Next immediately follows this
+ /// Entry.
+ bool MergeRanges(const DebugLocEntry &Next) {
+ // If this and Next are describing the same variable, merge them.
+ if ((End == Next.Begin && Values == Next.Values)) {
+ End = Next.End;
+ return true;
+ }
+ return false;
+ }
+
+ const MCSymbol *getBeginSym() const { return Begin; }
+ const MCSymbol *getEndSym() const { return End; }
+ ArrayRef<Value> getValues() const { return Values; }
+ void addValues(ArrayRef<DebugLocEntry::Value> Vals) {
+ Values.append(Vals.begin(), Vals.end());
+ sortUniqueValues();
+ assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){
+ return V.isBitPiece();
+ }) && "value must be a piece");
+ }
+
+ // \brief Sort the pieces by offset.
+ // Remove any duplicate entries by dropping all but the first.
+ void sortUniqueValues() {
+ std::sort(Values.begin(), Values.end());
+ Values.erase(
+ std::unique(
+ Values.begin(), Values.end(), [](const Value &A, const Value &B) {
+ return A.getExpression() == B.getExpression();
+ }),
+ Values.end());
+ }
+
+ /// \brief Lower this entry into a DWARF expression.
+ void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List,
+ const DIBasicType *BT);
+};
+
+/// \brief Compare two Values for equality.
+inline bool operator==(const DebugLocEntry::Value &A,
+ const DebugLocEntry::Value &B) {
+ if (A.EntryKind != B.EntryKind)
+ return false;
+
+ if (A.Expression != B.Expression)
+ return false;
+
+ switch (A.EntryKind) {
+ case DebugLocEntry::Value::E_Location:
+ return A.Loc == B.Loc;
+ case DebugLocEntry::Value::E_Integer:
+ return A.Constant.Int == B.Constant.Int;
+ case DebugLocEntry::Value::E_ConstantFP:
+ return A.Constant.CFP == B.Constant.CFP;
+ case DebugLocEntry::Value::E_ConstantInt:
+ return A.Constant.CIP == B.Constant.CIP;
+ }
+ llvm_unreachable("unhandled EntryKind");
+}
+
+/// \brief Compare two pieces based on their offset.
+inline bool operator<(const DebugLocEntry::Value &A,
+ const DebugLocEntry::Value &B) {
+ return A.getExpression()->getBitPieceOffset() <
+ B.getExpression()->getBitPieceOffset();
+}
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
new file mode 100644
index 000000000000..7e8ed7104af3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -0,0 +1,46 @@
+//===- DebugLocStream.cpp - DWARF debug_loc stream --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DebugLocStream.h"
+#include "DwarfDebug.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+
+using namespace llvm;
+
+bool DebugLocStream::finalizeList(AsmPrinter &Asm) {
+ if (Lists.back().EntryOffset == Entries.size()) {
+ // Empty list. Delete it.
+ Lists.pop_back();
+ return false;
+ }
+
+ // Real list. Generate a label for it.
+ Lists.back().Label = Asm.createTempSymbol("debug_loc");
+ return true;
+}
+
+void DebugLocStream::finalizeEntry() {
+ if (Entries.back().ByteOffset != DWARFBytes.size())
+ return;
+
+ // The last entry was empty. Delete it.
+ Comments.erase(Comments.begin() + Entries.back().CommentOffset,
+ Comments.end());
+ Entries.pop_back();
+
+ assert(Lists.back().EntryOffset <= Entries.size() &&
+ "Popped off more entries than are in the list");
+}
+
+DebugLocStream::ListBuilder::~ListBuilder() {
+ if (!Locs.finalizeList(Asm))
+ return;
+ V.initializeDbgValue(&MI);
+ V.setDebugLocListIndex(ListIndex);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
new file mode 100644
index 000000000000..3656e9d95099
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -0,0 +1,193 @@
+//===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "ByteStreamer.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class DbgVariable;
+class DwarfCompileUnit;
+class MachineInstr;
+class MCSymbol;
+
+/// \brief Byte stream of .debug_loc entries.
+///
+/// Stores a unified stream of .debug_loc entries. There's \a List for each
+/// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry.
+///
+/// FIXME: Do we need all these temp symbols?
+/// FIXME: Why not output directly to the output stream?
+class DebugLocStream {
+public:
+ struct List {
+ DwarfCompileUnit *CU;
+ MCSymbol *Label = nullptr;
+ size_t EntryOffset;
+ List(DwarfCompileUnit *CU, size_t EntryOffset)
+ : CU(CU), EntryOffset(EntryOffset) {}
+ };
+ struct Entry {
+ const MCSymbol *BeginSym;
+ const MCSymbol *EndSym;
+ size_t ByteOffset;
+ size_t CommentOffset;
+ Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset,
+ size_t CommentOffset)
+ : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset),
+ CommentOffset(CommentOffset) {}
+ };
+
+private:
+ SmallVector<List, 4> Lists;
+ SmallVector<Entry, 32> Entries;
+ SmallString<256> DWARFBytes;
+ SmallVector<std::string, 32> Comments;
+
+ /// \brief Only verbose textual output needs comments. This will be set to
+ /// true for that case, and false otherwise.
+ bool GenerateComments;
+
+public:
+ DebugLocStream(bool GenerateComments) : GenerateComments(GenerateComments) { }
+ size_t getNumLists() const { return Lists.size(); }
+ const List &getList(size_t LI) const { return Lists[LI]; }
+ ArrayRef<List> getLists() const { return Lists; }
+
+ class ListBuilder;
+ class EntryBuilder;
+
+private:
+ /// \brief Start a new .debug_loc entry list.
+ ///
+ /// Start a new .debug_loc entry list. Return the new list's index so it can
+ /// be retrieved later via \a getList().
+ ///
+ /// Until the next call, \a startEntry() will add entries to this list.
+ size_t startList(DwarfCompileUnit *CU) {
+ size_t LI = Lists.size();
+ Lists.emplace_back(CU, Entries.size());
+ return LI;
+ }
+
+ /// Finalize a .debug_loc entry list.
+ ///
+ /// If there are no entries in this list, delete it outright. Otherwise,
+ /// create a label with \a Asm.
+ ///
+ /// \return false iff the list is deleted.
+ bool finalizeList(AsmPrinter &Asm);
+
+ /// \brief Start a new .debug_loc entry.
+ ///
+ /// Until the next call, bytes added to the stream will be added to this
+ /// entry.
+ void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) {
+ Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size());
+ }
+
+ /// Finalize a .debug_loc entry, deleting if it's empty.
+ void finalizeEntry();
+
+public:
+ BufferByteStreamer getStreamer() {
+ return BufferByteStreamer(DWARFBytes, Comments, GenerateComments);
+ }
+
+ ArrayRef<Entry> getEntries(const List &L) const {
+ size_t LI = getIndex(L);
+ return makeArrayRef(Entries)
+ .slice(Lists[LI].EntryOffset, getNumEntries(LI));
+ }
+
+ ArrayRef<char> getBytes(const Entry &E) const {
+ size_t EI = getIndex(E);
+ return makeArrayRef(DWARFBytes.begin(), DWARFBytes.end())
+ .slice(Entries[EI].ByteOffset, getNumBytes(EI));
+ }
+ ArrayRef<std::string> getComments(const Entry &E) const {
+ size_t EI = getIndex(E);
+ return makeArrayRef(Comments)
+ .slice(Entries[EI].CommentOffset, getNumComments(EI));
+ }
+
+private:
+ size_t getIndex(const List &L) const {
+ assert(&Lists.front() <= &L && &L <= &Lists.back() &&
+ "Expected valid list");
+ return &L - &Lists.front();
+ }
+ size_t getIndex(const Entry &E) const {
+ assert(&Entries.front() <= &E && &E <= &Entries.back() &&
+ "Expected valid entry");
+ return &E - &Entries.front();
+ }
+ size_t getNumEntries(size_t LI) const {
+ if (LI + 1 == Lists.size())
+ return Entries.size() - Lists[LI].EntryOffset;
+ return Lists[LI + 1].EntryOffset - Lists[LI].EntryOffset;
+ }
+ size_t getNumBytes(size_t EI) const {
+ if (EI + 1 == Entries.size())
+ return DWARFBytes.size() - Entries[EI].ByteOffset;
+ return Entries[EI + 1].ByteOffset - Entries[EI].ByteOffset;
+ }
+ size_t getNumComments(size_t EI) const {
+ if (EI + 1 == Entries.size())
+ return Comments.size() - Entries[EI].CommentOffset;
+ return Entries[EI + 1].CommentOffset - Entries[EI].CommentOffset;
+ }
+};
+
+/// Builder for DebugLocStream lists.
+class DebugLocStream::ListBuilder {
+ DebugLocStream &Locs;
+ AsmPrinter &Asm;
+ DbgVariable &V;
+ const MachineInstr &MI;
+ size_t ListIndex;
+
+public:
+ ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm,
+ DbgVariable &V, const MachineInstr &MI)
+ : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)) {}
+
+ /// Finalize the list.
+ ///
+ /// If the list is empty, delete it. Otherwise, finalize it by creating a
+ /// temp symbol in \a Asm and setting up the \a DbgVariable.
+ ~ListBuilder();
+
+ DebugLocStream &getLocs() { return Locs; }
+};
+
+/// Builder for DebugLocStream entries.
+class DebugLocStream::EntryBuilder {
+ DebugLocStream &Locs;
+
+public:
+ EntryBuilder(ListBuilder &List, const MCSymbol *Begin, const MCSymbol *End)
+ : Locs(List.getLocs()) {
+ Locs.startEntry(Begin, End);
+ }
+
+ /// Finalize the entry, deleting it if it's empty.
+ ~EntryBuilder() { Locs.finalizeEntry(); }
+
+ BufferByteStreamer getStreamer() { return Locs.getStreamer(); }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 000000000000..4ad3e1867328
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,289 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfAccelTable.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
+DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList)
+ : Header(8 + (atomList.size() * 4)), HeaderData(atomList),
+ Entries(Allocator) {}
+
+void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die,
+ char Flags) {
+ assert(Data.empty() && "Already finalized!");
+ // If the string is in the list already then add this die to the list
+ // otherwise add a new one.
+ DataArray &DIEs = Entries[Name.getString()];
+ assert(!DIEs.Name || DIEs.Name == Name);
+ DIEs.Name = Name;
+ DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags));
+}
+
+void DwarfAccelTable::ComputeBucketCount() {
+ // First get the number of unique hashes.
+ std::vector<uint32_t> uniques(Data.size());
+ for (size_t i = 0, e = Data.size(); i < e; ++i)
+ uniques[i] = Data[i]->HashValue;
+ array_pod_sort(uniques.begin(), uniques.end());
+ std::vector<uint32_t>::iterator p =
+ std::unique(uniques.begin(), uniques.end());
+ uint32_t num = std::distance(uniques.begin(), p);
+
+ // Then compute the bucket size, minimum of 1 bucket.
+ if (num > 1024)
+ Header.bucket_count = num / 4;
+ else if (num > 16)
+ Header.bucket_count = num / 2;
+ else
+ Header.bucket_count = num > 0 ? num : 1;
+
+ Header.hashes_count = num;
+}
+
+// compareDIEs - comparison predicate that sorts DIEs by their offset.
+static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
+ const DwarfAccelTable::HashDataContents *B) {
+ return A->Die->getOffset() < B->Die->getOffset();
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
+ // Create the individual hash data outputs.
+ Data.reserve(Entries.size());
+ for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end();
+ EI != EE; ++EI) {
+
+ // Unique the entries.
+ std::stable_sort(EI->second.Values.begin(), EI->second.Values.end(), compareDIEs);
+ EI->second.Values.erase(
+ std::unique(EI->second.Values.begin(), EI->second.Values.end()),
+ EI->second.Values.end());
+
+ HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second);
+ Data.push_back(Entry);
+ }
+
+ // Figure out how many buckets we need, then compute the bucket
+ // contents and the final ordering. We'll emit the hashes and offsets
+ // by doing a walk during the emission phase. We add temporary
+ // symbols to the data so that we can reference them during the offset
+ // later, we'll emit them when we emit the data.
+ ComputeBucketCount();
+
+ // Compute bucket contents and final ordering.
+ Buckets.resize(Header.bucket_count);
+ for (size_t i = 0, e = Data.size(); i < e; ++i) {
+ uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+ Buckets[bucket].push_back(Data[i]);
+ Data[i]->Sym = Asm->createTempSymbol(Prefix);
+ }
+
+ // Sort the contents of the buckets by hash value so that hash
+ // collisions end up together. Stable sort makes testing easier and
+ // doesn't cost much more.
+ for (size_t i = 0; i < Buckets.size(); ++i)
+ std::stable_sort(Buckets[i].begin(), Buckets[i].end(),
+ [] (HashData *LHS, HashData *RHS) {
+ return LHS->HashValue < RHS->HashValue;
+ });
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+ Asm->OutStreamer->AddComment("Header Magic");
+ Asm->EmitInt32(Header.magic);
+ Asm->OutStreamer->AddComment("Header Version");
+ Asm->EmitInt16(Header.version);
+ Asm->OutStreamer->AddComment("Header Hash Function");
+ Asm->EmitInt16(Header.hash_function);
+ Asm->OutStreamer->AddComment("Header Bucket Count");
+ Asm->EmitInt32(Header.bucket_count);
+ Asm->OutStreamer->AddComment("Header Hash Count");
+ Asm->EmitInt32(Header.hashes_count);
+ Asm->OutStreamer->AddComment("Header Data Length");
+ Asm->EmitInt32(Header.header_data_len);
+ Asm->OutStreamer->AddComment("HeaderData Die Offset Base");
+ Asm->EmitInt32(HeaderData.die_offset_base);
+ Asm->OutStreamer->AddComment("HeaderData Atom Count");
+ Asm->EmitInt32(HeaderData.Atoms.size());
+ for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+ Atom A = HeaderData.Atoms[i];
+ Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.type));
+ Asm->EmitInt16(A.type);
+ Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.form));
+ Asm->EmitInt16(A.form);
+ }
+}
+
+// Walk through and emit the buckets for the table. Each index is
+// an offset into the list of hashes.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+ unsigned index = 0;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ Asm->OutStreamer->AddComment("Bucket " + Twine(i));
+ if (Buckets[i].size() != 0)
+ Asm->EmitInt32(index);
+ else
+ Asm->EmitInt32(UINT32_MAX);
+ // Buckets point in the list of hashes, not to the data. Do not
+ // increment the index multiple times in case of hash collisions.
+ uint64_t PrevHash = UINT64_MAX;
+ for (auto *HD : Buckets[i]) {
+ uint32_t HashValue = HD->HashValue;
+ if (PrevHash != HashValue)
+ ++index;
+ PrevHash = HashValue;
+ }
+ }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+ uint64_t PrevHash = UINT64_MAX;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end();
+ HI != HE; ++HI) {
+ uint32_t HashValue = (*HI)->HashValue;
+ if (PrevHash == HashValue)
+ continue;
+ Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(i));
+ Asm->EmitInt32(HashValue);
+ PrevHash = HashValue;
+ }
+ }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
+ uint64_t PrevHash = UINT64_MAX;
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end();
+ HI != HE; ++HI) {
+ uint32_t HashValue = (*HI)->HashValue;
+ if (PrevHash == HashValue)
+ continue;
+ PrevHash = HashValue;
+ Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
+ MCContext &Context = Asm->OutStreamer->getContext();
+ const MCExpr *Sub = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create((*HI)->Sym, Context),
+ MCSymbolRefExpr::create(SecBegin, Context), Context);
+ Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t));
+ }
+ }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ uint64_t PrevHash = UINT64_MAX;
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end();
+ HI != HE; ++HI) {
+ // Terminate the previous entry if there is no hash collision
+ // with the current one.
+ if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue)
+ Asm->EmitInt32(0);
+ // Remember to emit the label for our offset.
+ Asm->OutStreamer->EmitLabel((*HI)->Sym);
+ Asm->OutStreamer->AddComment((*HI)->Str);
+ Asm->emitDwarfStringOffset((*HI)->Data.Name);
+ Asm->OutStreamer->AddComment("Num DIEs");
+ Asm->EmitInt32((*HI)->Data.Values.size());
+ for (HashDataContents *HD : (*HI)->Data.Values) {
+ // Emit the DIE offset
+ DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit());
+ assert(CU && "Accelerated DIE should belong to a CU.");
+ Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset());
+ // If we have multiple Atoms emit that info too.
+ // FIXME: A bit of a hack, we either emit only one atom or all info.
+ if (HeaderData.Atoms.size() > 1) {
+ Asm->EmitInt16(HD->Die->getTag());
+ Asm->EmitInt8(HD->Flags);
+ }
+ }
+ PrevHash = (*HI)->HashValue;
+ }
+ // Emit the final end marker for the bucket.
+ if (!Buckets[i].empty())
+ Asm->EmitInt32(0);
+ }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin,
+ DwarfDebug *D) {
+ // Emit the header.
+ EmitHeader(Asm);
+
+ // Emit the buckets.
+ EmitBuckets(Asm);
+
+ // Emit the hashes.
+ EmitHashes(Asm);
+
+ // Emit the offsets.
+ emitOffsets(Asm, SecBegin);
+
+ // Emit the hash data.
+ EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+ Header.print(O);
+ HeaderData.print(O);
+
+ O << "Entries: \n";
+ for (StringMap<DataArray>::const_iterator EI = Entries.begin(),
+ EE = Entries.end();
+ EI != EE; ++EI) {
+ O << "Name: " << EI->getKeyData() << "\n";
+ for (HashDataContents *HD : EI->second.Values)
+ HD->print(O);
+ }
+
+ O << "Buckets and Hashes: \n";
+ for (size_t i = 0, e = Buckets.size(); i < e; ++i)
+ for (HashList::const_iterator HI = Buckets[i].begin(),
+ HE = Buckets[i].end();
+ HI != HE; ++HI)
+ (*HI)->print(O);
+
+ O << "Data: \n";
+ for (std::vector<HashData *>::const_iterator DI = Data.begin(),
+ DE = Data.end();
+ DI != DE; ++DI)
+ (*DI)->print(O);
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
new file mode 100644
index 000000000000..4d81441f6a72
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -0,0 +1,256 @@
+//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include <vector>
+
+// The dwarf accelerator tables are an indirect hash table optimized
+// for null lookup rather than access to known data. They are output into
+// an on-disk format that looks like this:
+//
+// .-------------.
+// | HEADER |
+// |-------------|
+// | BUCKETS |
+// |-------------|
+// | HASHES |
+// |-------------|
+// | OFFSETS |
+// |-------------|
+// | DATA |
+// `-------------'
+//
+// where the header contains a magic number, version, type of hash function,
+// the number of buckets, total number of hashes, and room for a special
+// struct of data and the length of that struct.
+//
+// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+// section contains all of the 32-bit hash values in contiguous memory, and
+// the offsets contain the offset into the data area for the particular
+// hash.
+//
+// For a lookup example, we could hash a function name and take it modulo the
+// number of buckets giving us our bucket. From there we take the bucket value
+// as an index into the hashes table and look at each successive hash as long
+// as the hash value is still the same modulo result (bucket value) as earlier.
+// If we have a match we look at that same entry in the offsets table and
+// grab the offset in the data for our final match.
+
+namespace llvm {
+
+class AsmPrinter;
+class DwarfDebug;
+
+class DwarfAccelTable {
+
+ static uint32_t HashDJB(StringRef Str) {
+ uint32_t h = 5381;
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ h = ((h << 5) + h) + Str[i];
+ return h;
+ }
+
+ // Helper function to compute the number of buckets needed based on
+ // the number of unique hashes.
+ void ComputeBucketCount(void);
+
+ struct TableHeader {
+ uint32_t magic; // 'HASH' magic value to allow endian detection
+ uint16_t version; // Version number.
+ uint16_t hash_function; // The hash function enumeration that was used.
+ uint32_t bucket_count; // The number of buckets in this hash table.
+ uint32_t hashes_count; // The total number of unique hash values
+ // and hash data offsets in this table.
+ uint32_t header_data_len; // The bytes to skip to get to the hash
+ // indexes (buckets) for correct alignment.
+ // Also written to disk is the implementation specific header data.
+
+ static const uint32_t MagicHash = 0x48415348;
+
+ TableHeader(uint32_t data_len)
+ : magic(MagicHash), version(1),
+ hash_function(dwarf::DW_hash_function_djb), bucket_count(0),
+ hashes_count(0), header_data_len(data_len) {}
+
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Magic: " << format("0x%x", magic) << "\n"
+ << "Version: " << version << "\n"
+ << "Hash Function: " << hash_function << "\n"
+ << "Bucket Count: " << bucket_count << "\n"
+ << "Header Data Length: " << header_data_len << "\n";
+ }
+ void dump() { print(dbgs()); }
+#endif
+ };
+
+public:
+ // The HeaderData describes the form of each set of data. In general this
+ // is as a list of atoms (atom_count) where each atom contains a type
+ // (AtomType type) of data, and an encoding form (form). In the case of
+ // data that is referenced via DW_FORM_ref_* the die_offset_base is
+ // used to describe the offset for all forms in the list of atoms.
+ // This also serves as a public interface of sorts.
+ // When written to disk this will have the form:
+ //
+ // uint32_t die_offset_base
+ // uint32_t atom_count
+ // atom_count Atoms
+
+ // Make these public so that they can be used as a general interface to
+ // the class.
+ struct Atom {
+ uint16_t type; // enum AtomType
+ uint16_t form; // DWARF DW_FORM_ defines
+
+ LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form)
+ : type(type), form(form) {}
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Type: " << dwarf::AtomTypeString(type) << "\n"
+ << "Form: " << dwarf::FormEncodingString(form) << "\n";
+ }
+ void dump() { print(dbgs()); }
+#endif
+ };
+
+private:
+ struct TableHeaderData {
+ uint32_t die_offset_base;
+ SmallVector<Atom, 3> Atoms;
+
+ TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0)
+ : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {}
+
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "die_offset_base: " << die_offset_base << "\n";
+ for (size_t i = 0; i < Atoms.size(); i++)
+ Atoms[i].print(O);
+ }
+ void dump() { print(dbgs()); }
+#endif
+ };
+
+ // The data itself consists of a str_offset, a count of the DIEs in the
+ // hash and the offsets to the DIEs themselves.
+ // On disk each data section is ended with a 0 KeyType as the end of the
+ // hash chain.
+ // On output this looks like:
+ // uint32_t str_offset
+ // uint32_t hash_data_count
+ // HashData[hash_data_count]
+public:
+ struct HashDataContents {
+ const DIE *Die; // Offsets
+ char Flags; // Specific flags to output
+
+ HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {}
+#ifndef NDEBUG
+ void print(raw_ostream &O) const {
+ O << " Offset: " << Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n";
+ O << " Flags: " << Flags << "\n";
+ }
+#endif
+ };
+
+private:
+ // String Data
+ struct DataArray {
+ DwarfStringPoolEntryRef Name;
+ std::vector<HashDataContents *> Values;
+ };
+ friend struct HashData;
+ struct HashData {
+ StringRef Str;
+ uint32_t HashValue;
+ MCSymbol *Sym;
+ DwarfAccelTable::DataArray &Data; // offsets
+ HashData(StringRef S, DwarfAccelTable::DataArray &Data)
+ : Str(S), Data(Data) {
+ HashValue = DwarfAccelTable::HashDJB(S);
+ }
+#ifndef NDEBUG
+ void print(raw_ostream &O) {
+ O << "Name: " << Str << "\n";
+ O << " Hash Value: " << format("0x%x", HashValue) << "\n";
+ O << " Symbol: ";
+ if (Sym)
+ O << *Sym;
+ else
+ O << "<none>";
+ O << "\n";
+ for (HashDataContents *C : Data.Values) {
+ O << " Offset: " << C->Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n";
+ O << " Flags: " << C->Flags << "\n";
+ }
+ }
+ void dump() { print(dbgs()); }
+#endif
+ };
+
+ DwarfAccelTable(const DwarfAccelTable &) = delete;
+ void operator=(const DwarfAccelTable &) = delete;
+
+ // Internal Functions
+ void EmitHeader(AsmPrinter *);
+ void EmitBuckets(AsmPrinter *);
+ void EmitHashes(AsmPrinter *);
+ void emitOffsets(AsmPrinter *, const MCSymbol *);
+ void EmitData(AsmPrinter *, DwarfDebug *D);
+
+ // Allocator for HashData and HashDataContents.
+ BumpPtrAllocator Allocator;
+
+ // Output Variables
+ TableHeader Header;
+ TableHeaderData HeaderData;
+ std::vector<HashData *> Data;
+
+ typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries;
+ StringEntries Entries;
+
+ // Buckets/Hashes/Offsets
+ typedef std::vector<HashData *> HashList;
+ typedef std::vector<HashList> BucketList;
+ BucketList Buckets;
+ HashList Hashes;
+
+ // Public Implementation
+public:
+ DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
+ void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0);
+ void FinalizeTable(AsmPrinter *, StringRef);
+ void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *);
+#ifndef NDEBUG
+ void print(raw_ostream &O);
+ void dump() { print(dbgs()); }
+#endif
+};
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 000000000000..2eae1b234473
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,179 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
+ : EHStreamer(A), shouldEmitCFI(false) {}
+
+void DwarfCFIExceptionBase::markFunctionEnd() {
+ endFragment();
+
+ if (MMI->getLandingPads().empty())
+ return;
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+}
+
+void DwarfCFIExceptionBase::endFragment() {
+ if (shouldEmitCFI)
+ Asm->OutStreamer->EmitCFIEndProc();
+}
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+ : DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
+ forceEmitPersonality(false), shouldEmitLSDA(false),
+ shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// endModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::endModule() {
+ // SjLj uses this pass and it doesn't need this info.
+ if (!Asm->MAI->usesCFIForEH())
+ return;
+
+ if (moveTypeModule == AsmPrinter::CFI_M_Debug)
+ Asm->OutStreamer->EmitCFISections(false, true);
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect)
+ return;
+
+ // Emit references to all used personality functions
+ for (const Function *Personality : MMI->getPersonalities()) {
+ if (!Personality)
+ continue;
+ MCSymbol *Sym = Asm->getSymbol(Personality);
+ TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym);
+ }
+}
+
+static MCSymbol *getExceptionSym(AsmPrinter *Asm) {
+ return Asm->getCurExceptionSym();
+}
+
+void DwarfCFIException::beginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+ const Function *F = MF->getFunction();
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+
+ // See if we need frame move info.
+ AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
+ if (MoveType == AsmPrinter::CFI_M_EH ||
+ (MoveType == AsmPrinter::CFI_M_Debug &&
+ moveTypeModule == AsmPrinter::CFI_M_None))
+ moveTypeModule = MoveType;
+
+ shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = nullptr;
+ if (F->hasPersonalityFn())
+ Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+
+ // Emit a personality function even when there are no landing pads
+ forceEmitPersonality =
+ // ...if a personality function is explicitly specified
+ F->hasPersonalityFn() &&
+ // ... and it's not known to be a noop in the absence of invokes
+ !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+ // ... and we're not explicitly asked not to emit it
+ F->needsUnwindTableEntry();
+
+ shouldEmitPersonality =
+ (forceEmitPersonality ||
+ (hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit)) &&
+ Per;
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ shouldEmitCFI = MF->getMMI().getContext().getAsmInfo()->usesCFIForEH() &&
+ (shouldEmitPersonality || shouldEmitMoves);
+ beginFragment(&*MF->begin(), getExceptionSym);
+}
+
+void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
+ ExceptionSymbolProvider ESP) {
+ if (!shouldEmitCFI)
+ return;
+
+ Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false);
+
+ // Indicate personality routine, if any.
+ if (!shouldEmitPersonality)
+ return;
+
+ auto *F = MBB->getParent()->getFunction();
+ auto *P = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ assert(P && "Expected personality function");
+
+ // If we are forced to emit this personality, make sure to record
+ // it because it might not appear in any landingpad
+ if (forceEmitPersonality)
+ MMI->addPersonality(P);
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const MCSymbol *Sym =
+ TLOF.getCFIPersonalitySymbol(P, *Asm->Mang, Asm->TM, MMI);
+ Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
+
+ // Provide LSDA information.
+ if (shouldEmitLSDA)
+ Asm->OutStreamer->EmitCFILsda(ESP(Asm), TLOF.getLSDAEncoding());
+}
+
+/// endFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::endFunction(const MachineFunction *) {
+ if (!shouldEmitPersonality)
+ return;
+
+ emitExceptionTable();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
new file mode 100644
index 000000000000..7822814c7a0f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -0,0 +1,833 @@
+#include "DwarfCompileUnit.h"
+#include "DwarfExpression.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+namespace llvm {
+
+DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
+ AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU)
+ : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID),
+ Skeleton(nullptr), BaseAddress(nullptr) {
+ insertDIE(Node, &getUnitDie());
+ MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
+}
+
+/// addLabelAddress - Add a dwarf label attribute data and value using
+/// DW_FORM_addr or DW_FORM_GNU_addr_index.
+///
+void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label) {
+
+ // Don't use the address pool in non-fission or in the skeleton unit itself.
+ // FIXME: Once GDB supports this, it's probably worthwhile using the address
+ // pool from the skeleton - maybe even in non-fission (possibly fewer
+ // relocations by sharing them in the pool, but we have other ideas about how
+ // to reduce the number of relocations as well/instead).
+ if (!DD->useSplitDwarf() || !Skeleton)
+ return addLocalLabelAddress(Die, Attribute, Label);
+
+ if (Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
+ unsigned idx = DD->getAddressPool().getIndex(Label);
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_GNU_addr_index,
+ DIEInteger(idx));
+}
+
+void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
+ dwarf::Attribute Attribute,
+ const MCSymbol *Label) {
+ if (Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
+ if (Label)
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
+ DIELabel(Label));
+ else
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr,
+ DIEInteger(0));
+}
+
+unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
+ StringRef DirName) {
+ // If we print assembly, we can't separate .file entries according to
+ // compile units. Thus all files will belong to the default compile unit.
+
+ // FIXME: add a better feature test than hasRawTextSupport. Even better,
+ // extend .file to support this.
+ return Asm->OutStreamer->EmitDwarfFileDirective(
+ 0, DirName, FileName,
+ Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID());
+}
+
+// Return const expression if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return nullptr;
+
+ // First operand points to a global struct.
+ Value *Ptr = CE->getOperand(0);
+ GlobalValue *GV = dyn_cast<GlobalValue>(Ptr);
+ if (!GV || !isa<StructType>(GV->getValueType()))
+ return nullptr;
+
+ // Second operand is zero.
+ const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return nullptr;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return nullptr;
+
+ return CE;
+}
+
+/// getOrCreateGlobalVariableDIE - get or create global variable DIE.
+DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
+ const DIGlobalVariable *GV) {
+ // Check for pre-existence.
+ if (DIE *Die = getDIE(GV))
+ return Die;
+
+ assert(GV);
+
+ auto *GVContext = GV->getScope();
+ auto *GTy = DD->resolve(GV->getType());
+
+ // Construct the context before querying for the existence of the DIE in
+ // case such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(GVContext);
+
+ // Add to map.
+ DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
+ DIScope *DeclContext;
+ if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
+ DeclContext = resolve(SDMDecl->getScope());
+ assert(SDMDecl->isStaticMember() && "Expected static member decl");
+ assert(GV->isDefinition());
+ // We need the declaration DIE that is in the static member's class.
+ DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
+ addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
+ } else {
+ DeclContext = GV->getScope();
+ // Add name and type.
+ addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
+ addType(*VariableDIE, GTy);
+
+ // Add scoping info.
+ if (!GV->isLocalToUnit())
+ addFlag(*VariableDIE, dwarf::DW_AT_external);
+
+ // Add line number info.
+ addSourceLine(*VariableDIE, GV);
+ }
+
+ if (!GV->isDefinition())
+ addFlag(*VariableDIE, dwarf::DW_AT_declaration);
+ else
+ addGlobalName(GV->getName(), *VariableDIE, DeclContext);
+
+ // Add location.
+ bool addToAccelTable = false;
+ if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
+ // We cannot describe the location of dllimport'd variables: the computation
+ // of their address requires loads from the IAT.
+ if (!Global->hasDLLImportStorageClass()) {
+ addToAccelTable = true;
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ const MCSymbol *Sym = Asm->getSymbol(Global);
+ if (Global->isThreadLocal()) {
+ if (Asm->TM.Options.EmulatedTLS) {
+ // TODO: add debug info for emulated thread local mode.
+ } else {
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
+ addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4
+ ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+ }
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
+ }
+ } else {
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
+ }
+
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ if (DD->useAllLinkageNames())
+ addLinkageName(*VariableDIE, GV->getLinkageName());
+ }
+ } else if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV->getVariable())) {
+ addConstantValue(*VariableDIE, CI, GTy);
+ } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
+ auto *Ptr = cast<GlobalValue>(CE->getOperand(0));
+ if (!Ptr->hasDLLImportStorageClass()) {
+ addToAccelTable = true;
+ // GV is a merged global.
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ MCSymbol *Sym = Asm->getSymbol(Ptr);
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ Asm->getDataLayout().getIndexedOffsetInType(Ptr->getValueType(),
+ Idx));
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+ }
+ }
+
+ if (addToAccelTable) {
+ DD->addAccelName(GV->getName(), *VariableDIE);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName())
+ DD->addAccelName(GV->getLinkageName(), *VariableDIE);
+ }
+
+ return VariableDIE;
+}
+
+void DwarfCompileUnit::addRange(RangeSpan Range) {
+ bool SameAsPrevCU = this == DD->getPrevCU();
+ DD->setPrevCU(this);
+ // If we have no current ranges just add the range and return, otherwise,
+ // check the current section and CU against the previous section and CU we
+ // emitted into and the subprogram was contained within. If these are the
+ // same then extend our current range, otherwise add this as a new range.
+ if (CURanges.empty() || !SameAsPrevCU ||
+ (&CURanges.back().getEnd()->getSection() !=
+ &Range.getEnd()->getSection())) {
+ CURanges.push_back(Range);
+ return;
+ }
+
+ CURanges.back().setEnd(Range.getEnd());
+}
+
+DIE::value_iterator
+DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label, const MCSymbol *Sec) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ return addLabel(Die, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Label);
+ return addSectionDelta(Die, Attribute, Label, Sec);
+}
+
+void DwarfCompileUnit::initStmtList() {
+ // Define start line table label for each Compile Unit.
+ MCSymbol *LineTableStartSym =
+ Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID());
+
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section. For split dwarf this is
+ // left in the skeleton CU and so not included.
+ // The line table entries are not always emitted in assembly, so it
+ // is not okay to use line_table_start here.
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ StmtListValue =
+ addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
+ TLOF.getDwarfLineSection()->getBeginSymbol());
+}
+
+void DwarfCompileUnit::applyStmtList(DIE &D) {
+ D.addValue(DIEValueAllocator, *StmtListValue);
+}
+
+void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
+ const MCSymbol *End) {
+ assert(Begin && "Begin label should not be null!");
+ assert(End && "End label should not be null!");
+ assert(Begin->isDefined() && "Invalid starting label");
+ assert(End->isDefined() && "Invalid end label");
+
+ addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
+ if (DD->getDwarfVersion() < 4)
+ addLabelAddress(D, dwarf::DW_AT_high_pc, End);
+ else
+ addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
+}
+
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
+DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
+ DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
+
+ attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd());
+ if (DD->useAppleExtensionAttributes() &&
+ !DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
+ *DD->getCurrentFunction()))
+ addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
+
+ // Only include DW_AT_frame_base in full debug info
+ if (!includeMinimalInlineScopes()) {
+ const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo();
+ MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+ if (RI->isPhysicalRegister(Location.getReg()))
+ addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
+ }
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_subprogram nodes.
+ DD->addSubprogramNames(SP, *SPDie);
+
+ return *SPDie;
+}
+
+// Construct a DIE for this scope.
+void DwarfCompileUnit::constructScopeDIE(
+ LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) {
+ if (!Scope || !Scope->getScopeNode())
+ return;
+
+ auto *DS = Scope->getScopeNode();
+
+ assert((Scope->getInlinedAt() || !isa<DISubprogram>(DS)) &&
+ "Only handle inlined subprograms here, use "
+ "constructSubprogramScopeDIE for non-inlined "
+ "subprograms");
+
+ SmallVector<DIE *, 8> Children;
+
+ // We try to create the scope DIE first, then the children DIEs. This will
+ // avoid creating un-used children then removing them later when we find out
+ // the scope DIE is null.
+ DIE *ScopeDIE;
+ if (Scope->getParent() && isa<DISubprogram>(DS)) {
+ ScopeDIE = constructInlinedScopeDIE(Scope);
+ if (!ScopeDIE)
+ return;
+ // We create children when the scope DIE is not null.
+ createScopeChildrenDIE(Scope, Children);
+ } else {
+ // Early exit when we know the scope DIE is going to be null.
+ if (DD->isLexicalScopeDIENull(Scope))
+ return;
+
+ unsigned ChildScopeCount;
+
+ // We create children here when we know the scope DIE is not going to be
+ // null and the children will be added to the scope DIE.
+ createScopeChildrenDIE(Scope, Children, &ChildScopeCount);
+
+ // Skip imported directives in gmlt-like data.
+ if (!includeMinimalInlineScopes()) {
+ // There is no need to emit empty lexical block DIE.
+ for (const auto *IE : ImportedEntities[DS])
+ Children.push_back(
+ constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
+ }
+
+ // If there are only other scopes as children, put them directly in the
+ // parent instead, as this scope would serve no purpose.
+ if (Children.size() == ChildScopeCount) {
+ FinalChildren.insert(FinalChildren.end(),
+ std::make_move_iterator(Children.begin()),
+ std::make_move_iterator(Children.end()));
+ return;
+ }
+ ScopeDIE = constructLexicalScopeDIE(Scope);
+ assert(ScopeDIE && "Scope DIE should not be null.");
+ }
+
+ // Add children
+ for (auto &I : Children)
+ ScopeDIE->addChild(std::move(I));
+
+ FinalChildren.push_back(std::move(ScopeDIE));
+}
+
+DIE::value_iterator
+DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ return Die.addValue(DIEValueAllocator, Attribute,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
+}
+
+void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
+ SmallVector<RangeSpan, 2> Range) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Emit offset in .debug_range as a relocatable label. emitDIE will handle
+ // emitting it appropriately.
+ const MCSymbol *RangeSectionSym =
+ TLOF.getDwarfRangesSection()->getBeginSymbol();
+
+ RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));
+
+ // Under fission, ranges are specified by constant offsets relative to the
+ // CU's DW_AT_GNU_ranges_base.
+ if (isDwoUnit())
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ else
+ addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+
+ // Add the range list to the set of ranges to be emitted.
+ (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List));
+}
+
+void DwarfCompileUnit::attachRangesOrLowHighPC(
+ DIE &Die, SmallVector<RangeSpan, 2> Ranges) {
+ if (Ranges.size() == 1) {
+ const auto &single = Ranges.front();
+ attachLowHighPC(Die, single.getStart(), single.getEnd());
+ } else
+ addScopeRangeList(Die, std::move(Ranges));
+}
+
+void DwarfCompileUnit::attachRangesOrLowHighPC(
+ DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) {
+ SmallVector<RangeSpan, 2> List;
+ List.reserve(Ranges.size());
+ for (const InsnRange &R : Ranges)
+ List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first),
+ DD->getLabelAfterInsn(R.second)));
+ attachRangesOrLowHighPC(Die, std::move(List));
+}
+
+// This scope represents inlined body of a function. Construct DIE to
+// represent this concrete inlined copy of the function.
+DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
+ assert(Scope->getScopeNode());
+ auto *DS = Scope->getScopeNode();
+ auto *InlinedSP = getDISubprogram(DS);
+ // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
+ // was inlined from another compile unit.
+ DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP];
+ assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
+
+ auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
+ addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
+
+ attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
+
+ // Add the call site information to the DIE.
+ const DILocation *IA = Scope->getInlinedAt();
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
+ getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
+ if (IA->getDiscriminator())
+ addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
+ IA->getDiscriminator());
+
+ // Add name to the name table, we do this here because we're guaranteed
+ // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+ DD->addSubprogramNames(InlinedSP, *ScopeDIE);
+
+ return ScopeDIE;
+}
+
+// Construct new DW_TAG_lexical_block for this scope and attach
+// DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
+ if (DD->isLexicalScopeDIENull(Scope))
+ return nullptr;
+
+ auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
+
+ attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
+
+ return ScopeDIE;
+}
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) {
+ auto D = constructVariableDIEImpl(DV, Abstract);
+ DV.setDIE(*D);
+ return D;
+}
+
+DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
+ bool Abstract) {
+ // Define variable debug information entry.
+ auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag());
+
+ if (Abstract) {
+ applyVariableAttributes(DV, *VariableDie);
+ return VariableDie;
+ }
+
+ // Add variable address.
+
+ unsigned Offset = DV.getDebugLocListIndex();
+ if (Offset != ~0U) {
+ addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
+ return VariableDie;
+ }
+
+ // Check if variable is described by a DBG_VALUE instruction.
+ if (const MachineInstr *DVInsn = DV.getMInsn()) {
+ assert(DVInsn->getNumOperands() == 4);
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ // If the second operand is an immediate, this is an indirect value.
+ if (DVInsn->getOperand(1).isImm()) {
+ MachineLocation Location(RegOp.getReg(),
+ DVInsn->getOperand(1).getImm());
+ addVariableAddress(DV, *VariableDie, Location);
+ } else if (RegOp.getReg())
+ addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg()));
+ } else if (DVInsn->getOperand(0).isImm()) {
+ // This variable is described by a single constant.
+ // Check whether it has a DIExpression.
+ auto *Expr = DV.getSingleExpression();
+ if (Expr && Expr->getNumElements()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ // If there is an expression, emit raw unsigned bytes.
+ DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm());
+ DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
+ addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
+ } else
+ addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
+ } else if (DVInsn->getOperand(0).isFPImm())
+ addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isCImm())
+ addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
+ DV.getType());
+
+ return VariableDie;
+ }
+
+ // .. else use frame index.
+ if (DV.getFrameIndex().empty())
+ return VariableDie;
+
+ auto Expr = DV.getExpression().begin();
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ for (auto FI : DV.getFrameIndex()) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+ int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ assert(Expr != DV.getExpression().end() && "Wrong number of expressions");
+ DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+ FrameReg, Offset);
+ DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
+ ++Expr;
+ }
+ addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
+
+ return VariableDie;
+}
+
+DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV,
+ const LexicalScope &Scope,
+ DIE *&ObjectPointer) {
+ auto Var = constructVariableDIE(DV, Scope.isAbstractScope());
+ if (DV.isObjectPointer())
+ ObjectPointer = Var;
+ return Var;
+}
+
+DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
+ SmallVectorImpl<DIE *> &Children,
+ unsigned *ChildScopeCount) {
+ DIE *ObjectPointer = nullptr;
+
+ for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope))
+ Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
+
+ unsigned ChildCountWithoutScopes = Children.size();
+
+ for (LexicalScope *LS : Scope->getChildren())
+ constructScopeDIE(LS, Children);
+
+ if (ChildScopeCount)
+ *ChildScopeCount = Children.size() - ChildCountWithoutScopes;
+
+ return ObjectPointer;
+}
+
+void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
+ assert(Scope && Scope->getScopeNode());
+ assert(!Scope->getInlinedAt());
+ assert(!Scope->isAbstractScope());
+ auto *Sub = cast<DISubprogram>(Scope->getScopeNode());
+
+ DD->getProcessedSPNodes().insert(Sub);
+
+ DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
+
+ // If this is a variadic function, add an unspecified parameter.
+ DITypeRefArray FnArgs = Sub->getType()->getTypeArray();
+
+ // Collect lexical scope children first.
+ // ObjectPointer might be a local (non-argument) local variable if it's a
+ // block's synthetic this pointer.
+ if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
+ addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
+
+ // If we have a single element of null, it is a function that returns void.
+ // If we have more than one elements and the last one is null, it is a
+ // variadic function.
+ if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] &&
+ !includeMinimalInlineScopes())
+ ScopeDIE.addChild(
+ DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters));
+}
+
+DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
+ DIE &ScopeDIE) {
+ // We create children when the scope DIE is not null.
+ SmallVector<DIE *, 8> Children;
+ DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children);
+
+ // Add children
+ for (auto &I : Children)
+ ScopeDIE.addChild(std::move(I));
+
+ return ObjectPointer;
+}
+
+void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
+ LexicalScope *Scope) {
+ DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()];
+ if (AbsDef)
+ return;
+
+ auto *SP = cast<DISubprogram>(Scope->getScopeNode());
+
+ DIE *ContextDIE;
+
+ if (includeMinimalInlineScopes())
+ ContextDIE = &getUnitDie();
+ // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
+ // the important distinction that the debug node is not associated with the
+ // DIE (since the debug node will be associated with the concrete DIE, if
+ // any). It could be refactored to some common utility function.
+ else if (auto *SPDecl = SP->getDeclaration()) {
+ ContextDIE = &getUnitDie();
+ getOrCreateSubprogramDIE(SPDecl);
+ } else
+ ContextDIE = getOrCreateContextDIE(resolve(SP->getScope()));
+
+ // Passing null as the associated node because the abstract definition
+ // shouldn't be found by lookup.
+ AbsDef = &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
+ applySubprogramAttributesToDefinition(SP, *AbsDef);
+
+ if (!includeMinimalInlineScopes())
+ addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
+ if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, *AbsDef))
+ addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
+}
+
+DIE *DwarfCompileUnit::constructImportedEntityDIE(
+ const DIImportedEntity *Module) {
+ DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
+ insertDIE(Module, IMDie);
+ DIE *EntityDie;
+ auto *Entity = resolve(Module->getEntity());
+ if (auto *NS = dyn_cast<DINamespace>(Entity))
+ EntityDie = getOrCreateNameSpace(NS);
+ else if (auto *M = dyn_cast<DIModule>(Entity))
+ EntityDie = getOrCreateModule(M);
+ else if (auto *SP = dyn_cast<DISubprogram>(Entity))
+ EntityDie = getOrCreateSubprogramDIE(SP);
+ else if (auto *T = dyn_cast<DIType>(Entity))
+ EntityDie = getOrCreateTypeDIE(T);
+ else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
+ EntityDie = getOrCreateGlobalVariableDIE(GV);
+ else
+ EntityDie = getDIE(Entity);
+ assert(EntityDie);
+ addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(),
+ Module->getScope()->getDirectory());
+ addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
+ StringRef Name = Module->getName();
+ if (!Name.empty())
+ addString(*IMDie, dwarf::DW_AT_name, Name);
+
+ return IMDie;
+}
+
+void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
+ DIE *D = getDIE(SP);
+ if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) {
+ if (D)
+ // If this subprogram has an abstract definition, reference that
+ addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
+ } else {
+ if (!D && !includeMinimalInlineScopes())
+ // Lazily construct the subprogram if we didn't see either concrete or
+ // inlined versions during codegen. (except in -gmlt ^ where we want
+ // to omit these entirely)
+ D = getOrCreateSubprogramDIE(SP);
+ if (D)
+ // And attach the attributes
+ applySubprogramAttributesToDefinition(SP, *D);
+ }
+}
+
+void DwarfCompileUnit::emitHeader(bool UseOffsets) {
+ // Don't bother labeling the .dwo unit, as its offset isn't used.
+ if (!Skeleton) {
+ LabelBegin = Asm->createTempSymbol("cu_begin");
+ Asm->OutStreamer->EmitLabel(LabelBegin);
+ }
+
+ DwarfUnit::emitHeader(UseOffsets);
+}
+
+/// addGlobalName - Add a new global name to the compile unit.
+void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
+ const DIScope *Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Name.str();
+ GlobalNames[FullName] = &Die;
+}
+
+/// Add a new global type to the unit.
+void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Ty->getName().str();
+ GlobalTypes[FullName] = &Die;
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a
+/// DbgVariable based on provided MachineLocation.
+void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
+ MachineLocation Location) {
+ if (DV.hasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else if (DV.isBlockByrefVariable())
+ addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// Add an address attribute to a die based on the location provided.
+void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MachineLocation &Location) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+
+ bool validReg;
+ if (Location.isReg())
+ validReg = addRegisterOpPiece(*Loc, Location.getReg());
+ else
+ validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+
+ if (!validReg)
+ return;
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, Loc);
+}
+
+/// Start with the address based on the location provided, and generate the
+/// DWARF information necessary to find the actual variable given the extra
+/// address information encoded in the DbgVariable, starting from the starting
+/// location. Add the DWARF information to the die.
+void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ const DIExpression *Expr = DV.getSingleExpression();
+ bool ValidReg;
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+ if (Location.getOffset()) {
+ ValidReg = DwarfExpr.AddMachineRegIndirect(TRI, Location.getReg(),
+ Location.getOffset());
+ if (ValidReg)
+ DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
+ } else
+ ValidReg = DwarfExpr.AddMachineRegExpression(TRI, Expr, Location.getReg());
+
+ // Now attach the location information to the DIE.
+ if (ValidReg)
+ addBlock(Die, Attribute, Loc);
+}
+
+/// Add a Dwarf loclistptr attribute data and value.
+void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
+ unsigned Index) {
+ dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4;
+ Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index));
+}
+
+void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
+ DIE &VariableDie) {
+ StringRef Name = Var.getName();
+ if (!Name.empty())
+ addString(VariableDie, dwarf::DW_AT_name, Name);
+ addSourceLine(VariableDie, Var.getVariable());
+ addType(VariableDie, Var.getType());
+ if (Var.isArtificial())
+ addFlag(VariableDie, dwarf::DW_AT_artificial);
+}
+
+/// Add a Dwarf expression attribute data and value.
+void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
+ const MCExpr *Expr) {
+ Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr));
+}
+
+void DwarfCompileUnit::applySubprogramAttributesToDefinition(
+ const DISubprogram *SP, DIE &SPDie) {
+ auto *SPDecl = SP->getDeclaration();
+ auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope());
+ applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
+ addGlobalName(SP->getName(), SPDie, Context);
+}
+
+bool DwarfCompileUnit::isDwoUnit() const {
+ return DD->useSplitDwarf() && Skeleton;
+}
+
+bool DwarfCompileUnit::includeMinimalInlineScopes() const {
+ return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
+ (DD->useSplitDwarf() && !Skeleton);
+}
+} // end llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
new file mode 100644
index 000000000000..90f74a3686ea
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -0,0 +1,265 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DwarfUnit.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Dwarf.h"
+
+namespace llvm {
+
+class StringRef;
+class AsmPrinter;
+class DIE;
+class DwarfDebug;
+class DwarfFile;
+class MCSymbol;
+class LexicalScope;
+
+class DwarfCompileUnit : public DwarfUnit {
+ /// A numeric ID unique among all CUs in the module
+ unsigned UniqueID;
+
+ /// Offset of the UnitDie from beginning of debug info section.
+ unsigned DebugInfoOffset = 0;
+
+ /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
+ /// the need to search for it in applyStmtList.
+ DIE::value_iterator StmtListValue;
+
+ /// Skeleton unit associated with this unit.
+ DwarfCompileUnit *Skeleton;
+
+ /// The start of the unit within its section.
+ MCSymbol *LabelBegin;
+
+ /// The start of the unit macro info within macro section.
+ MCSymbol *MacroLabelBegin;
+
+ typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList;
+ typedef llvm::DenseMap<const MDNode *, ImportedEntityList>
+ ImportedEntityMap;
+
+ ImportedEntityMap ImportedEntities;
+
+ /// GlobalNames - A map of globally visible named entities for this unit.
+ StringMap<const DIE *> GlobalNames;
+
+ /// GlobalTypes - A map of globally visible types for this unit.
+ StringMap<const DIE *> GlobalTypes;
+
+ // List of range lists for a given compile unit, separate from the ranges for
+ // the CU itself.
+ SmallVector<RangeSpanList, 1> CURangeLists;
+
+ // List of ranges for a given compile unit.
+ SmallVector<RangeSpan, 2> CURanges;
+
+ // The base address of this unit, if any. Used for relative references in
+ // ranges/locs.
+ const MCSymbol *BaseAddress;
+
+ /// \brief Construct a DIE for the given DbgVariable without initializing the
+ /// DbgVariable's DIE reference.
+ DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);
+
+ bool isDwoUnit() const override;
+
+ bool includeMinimalInlineScopes() const;
+
+public:
+ DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU);
+
+ unsigned getUniqueID() const { return UniqueID; }
+ unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
+ void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
+
+ DwarfCompileUnit *getSkeleton() const {
+ return Skeleton;
+ }
+
+ void initStmtList();
+
+ /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
+ void applyStmtList(DIE &D);
+
+ /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
+ DIE *getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV);
+
+ /// addLabelAddress - Add a dwarf label attribute data and value using
+ /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label);
+
+ /// addLocalLabelAddress - Add a dwarf label attribute data and value using
+ /// DW_FORM_addr only.
+ void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label);
+
+ /// addSectionDelta - Add a label delta attribute data and value.
+ DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo);
+
+ DwarfCompileUnit &getCU() override { return *this; }
+
+ unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
+
+ void addImportedEntity(const DIImportedEntity* IE) {
+ DIScope *Scope = IE->getScope();
+ assert(Scope && "Invalid Scope encoding!");
+ if (!isa<DILocalScope>(Scope))
+ // No need to add imported enities that are not local declaration.
+ return;
+
+ auto *LocalScope = cast<DILocalScope>(Scope)->getNonLexicalBlockFileScope();
+ ImportedEntities[LocalScope].push_back(IE);
+ }
+
+ /// addRange - Add an address range to the list of ranges for this unit.
+ void addRange(RangeSpan Range);
+
+ void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End);
+
+ /// addSectionLabel - Add a Dwarf section label attribute data and value.
+ ///
+ DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Label,
+ const MCSymbol *Sec);
+
+ /// \brief Find DIE for the given subprogram and attach appropriate
+ /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
+ /// variables in this scope then create and insert DIEs for these
+ /// variables.
+ DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
+
+ void constructScopeDIE(LexicalScope *Scope,
+ SmallVectorImpl<DIE *> &FinalChildren);
+
+ /// \brief A helper function to construct a RangeSpanList for a given
+ /// lexical scope.
+ void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range);
+
+ void attachRangesOrLowHighPC(DIE &D, SmallVector<RangeSpan, 2> Ranges);
+
+ void attachRangesOrLowHighPC(DIE &D,
+ const SmallVectorImpl<InsnRange> &Ranges);
+ /// \brief This scope represents inlined body of a function. Construct
+ /// DIE to represent this concrete inlined copy of the function.
+ DIE *constructInlinedScopeDIE(LexicalScope *Scope);
+
+ /// \brief Construct new DW_TAG_lexical_block for this scope and
+ /// attach DW_AT_low_pc/DW_AT_high_pc labels.
+ DIE *constructLexicalScopeDIE(LexicalScope *Scope);
+
+ /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+ DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false);
+
+ DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope,
+ DIE *&ObjectPointer);
+
+ /// A helper function to create children of a Scope DIE.
+ DIE *createScopeChildrenDIE(LexicalScope *Scope,
+ SmallVectorImpl<DIE *> &Children,
+ unsigned *ChildScopeCount = nullptr);
+
+ /// \brief Construct a DIE for this subprogram scope.
+ void constructSubprogramScopeDIE(LexicalScope *Scope);
+
+ DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
+
+ void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+
+ /// \brief Construct import_module DIE.
+ DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
+
+ void finishSubprogramDefinition(const DISubprogram *SP);
+
+ /// Set the skeleton unit associated with this unit.
+ void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
+
+ const MCSymbol *getSectionSym() const {
+ assert(Section);
+ return Section->getBeginSymbol();
+ }
+
+ unsigned getLength() {
+ return sizeof(uint32_t) + // Length field
+ getHeaderSize() + UnitDie.getSize();
+ }
+
+ void emitHeader(bool UseOffsets) override;
+
+ MCSymbol *getLabelBegin() const {
+ assert(Section);
+ return LabelBegin;
+ }
+
+ MCSymbol *getMacroLabelBegin() const {
+ return MacroLabelBegin;
+ }
+
+ /// Add a new global name to the compile unit.
+ void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override;
+
+ /// Add a new global type to the compile unit.
+ void addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) override;
+
+ const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
+ const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
+
+ /// Add DW_AT_location attribute for a DbgVariable based on provided
+ /// MachineLocation.
+ void addVariableAddress(const DbgVariable &DV, DIE &Die,
+ MachineLocation Location);
+ /// Add an address attribute to a die based on the location provided.
+ void addAddress(DIE &Die, dwarf::Attribute Attribute,
+ const MachineLocation &Location);
+
+ /// Start with the address based on the location provided, and generate the
+ /// DWARF information necessary to find the actual variable (navigating the
+ /// extra location information encoded in the type) based on the starting
+ /// location. Add the DWARF information to the die.
+ void addComplexAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location);
+
+ /// Add a Dwarf loclistptr attribute data and value.
+ void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
+ void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
+
+ /// Add a Dwarf expression attribute data and value.
+ void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
+
+ void applySubprogramAttributesToDefinition(const DISubprogram *SP,
+ DIE &SPDie);
+
+ /// getRangeLists - Get the vector of range lists.
+ const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
+ return (Skeleton ? Skeleton : this)->CURangeLists;
+ }
+
+ /// getRanges - Get the list of ranges for this unit.
+ const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
+ SmallVector<RangeSpan, 2> takeRanges() { return std::move(CURanges); }
+
+ void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; }
+ const MCSymbol *getBaseAddress() const { return BaseAddress; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 000000000000..7fba7688f7fb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,1970 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfDebug.h"
+#include "ByteStreamer.h"
+#include "DIEHash.h"
+#include "DebugLocEntry.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfExpression.h"
+#include "DwarfUnit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+static cl::opt<bool>
+DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
+static cl::opt<bool> UnknownLocations(
+ "use-unknown-locations", cl::Hidden,
+ cl::desc("Make an absence of debug location information explicit."),
+ cl::init(false));
+
+static cl::opt<bool>
+GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden,
+ cl::desc("Generate GNU-style pubnames and pubtypes"),
+ cl::init(false));
+
+static cl::opt<bool> GenerateARangeSection("generate-arange-section",
+ cl::Hidden,
+ cl::desc("Generate dwarf aranges"),
+ cl::init(false));
+
+namespace {
+enum DefaultOnOff { Default, Enable, Disable };
+}
+
+static cl::opt<DefaultOnOff>
+DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+ cl::desc("Output prototype dwarf accelerator tables."),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"), clEnumValEnd),
+ cl::init(Default));
+
+static cl::opt<DefaultOnOff>
+SplitDwarf("split-dwarf", cl::Hidden,
+ cl::desc("Output DWARF5 split debug info."),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"), clEnumValEnd),
+ cl::init(Default));
+
+static cl::opt<DefaultOnOff>
+DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
+ cl::desc("Generate DWARF pubnames and pubtypes sections"),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"),
+ clEnumVal(Disable, "Disabled"), clEnumValEnd),
+ cl::init(Default));
+
+enum LinkageNameOption {
+ DefaultLinkageNames,
+ AllLinkageNames,
+ AbstractLinkageNames
+};
+static cl::opt<LinkageNameOption>
+ DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
+ cl::desc("Which DWARF linkage-name attributes to emit."),
+ cl::values(clEnumValN(DefaultLinkageNames, "Default",
+ "Default for platform"),
+ clEnumValN(AllLinkageNames, "All", "All"),
+ clEnumValN(AbstractLinkageNames, "Abstract",
+ "Abstract subprograms"),
+ clEnumValEnd),
+ cl::init(DefaultLinkageNames));
+
+static const char *const DWARFGroupName = "DWARF Emission";
+static const char *const DbgTimerName = "DWARF Debug Writer";
+
+void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
+ BS.EmitInt8(
+ Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
+ : dwarf::OperationEncodingString(Op));
+}
+
+void DebugLocDwarfExpression::EmitSigned(int64_t Value) {
+ BS.EmitSLEB128(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) {
+ BS.EmitULEB128(Value, Twine(Value));
+}
+
+bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) {
+ // This information is not available while emitting .debug_loc entries.
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+
+bool DbgVariable::isBlockByrefVariable() const {
+ assert(Var && "Invalid complex DbgVariable!");
+ return Var->getType().resolve()->isBlockByrefStruct();
+}
+
+const DIType *DbgVariable::getType() const {
+ DIType *Ty = Var->getType().resolve();
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (Ty->isBlockByrefStruct()) {
+ /* Byref variables, in Blocks, are declared by the programmer as
+ "SomeType VarName;", but the compiler creates a
+ __Block_byref_x_VarName struct, and gives the variable VarName
+ either the struct, or a pointer to the struct, as its type. This
+ is necessary for various behind-the-scenes things the compiler
+ needs to do with by-reference variables in blocks.
+
+ However, as far as the original *programmer* is concerned, the
+ variable should still have type 'SomeType', as originally declared.
+
+ The following function dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable. This will be
+ passed back to the code generating the type for the Debug
+ Information Entry for the variable 'VarName'. 'VarName' will then
+ have the original type 'SomeType' in its debug information.
+
+ The original type 'SomeType' will be the type of the field named
+ 'VarName' inside the __Block_byref_x_VarName struct.
+
+ NOTE: In order for this to not completely fail on the debugger
+ side, the Debug Information Entry for the variable VarName needs to
+ have a DW_AT_location that tells the debugger how to unwind through
+ the pointers and __Block_byref_x_VarName struct to find the actual
+ value of the variable. The function addBlockByrefType does this. */
+ DIType *subType = Ty;
+ uint16_t tag = Ty->getTag();
+
+ if (tag == dwarf::DW_TAG_pointer_type)
+ subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
+
+ auto Elements = cast<DICompositeType>(subType)->getElements();
+ for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+ auto *DT = cast<DIDerivedType>(Elements[i]);
+ if (getName() == DT->getName())
+ return resolve(DT->getBaseType());
+ }
+ }
+ return Ty;
+}
+
+static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
+ DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
+ DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
+ DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
+
+DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+ : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
+ InfoHolder(A, "info_string", DIEValueAllocator),
+ SkeletonHolder(A, "skel_string", DIEValueAllocator),
+ IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
+ AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
+ dwarf::DW_FORM_data4)),
+ AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
+ dwarf::DW_FORM_data4)),
+ AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
+ dwarf::DW_FORM_data4)),
+ AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
+
+ CurFn = nullptr;
+ Triple TT(Asm->getTargetTriple());
+
+ // Make sure we know our "debugger tuning." The target option takes
+ // precedence; fall back to triple-based defaults.
+ if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
+ DebuggerTuning = Asm->TM.Options.DebuggerTuning;
+ else if (IsDarwin)
+ DebuggerTuning = DebuggerKind::LLDB;
+ else if (TT.isPS4CPU())
+ DebuggerTuning = DebuggerKind::SCE;
+ else
+ DebuggerTuning = DebuggerKind::GDB;
+
+ // Turn on accelerator tables for LLDB by default.
+ if (DwarfAccelTables == Default)
+ HasDwarfAccelTables = tuneForLLDB();
+ else
+ HasDwarfAccelTables = DwarfAccelTables == Enable;
+
+ HasAppleExtensionAttributes = tuneForLLDB();
+
+ // Handle split DWARF. Off by default for now.
+ if (SplitDwarf == Default)
+ HasSplitDwarf = false;
+ else
+ HasSplitDwarf = SplitDwarf == Enable;
+
+ // Pubnames/pubtypes on by default for GDB.
+ if (DwarfPubSections == Default)
+ HasDwarfPubSections = tuneForGDB();
+ else
+ HasDwarfPubSections = DwarfPubSections == Enable;
+
+ // SCE defaults to linkage names only for abstract subprograms.
+ if (DwarfLinkageNames == DefaultLinkageNames)
+ UseAllLinkageNames = !tuneForSCE();
+ else
+ UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames;
+
+ unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
+ DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
+ : MMI->getModule()->getDwarfVersion();
+ // Use dwarf 4 by default if nothing is requested.
+ DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION;
+
+ // Work around a GDB bug. GDB doesn't support the standard opcode;
+ // SCE doesn't support GNU's; LLDB prefers the standard opcode, which
+ // is defined as of DWARF 3.
+ // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented
+ // https://sourceware.org/bugzilla/show_bug.cgi?id=11616
+ UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3;
+
+ // GDB does not fully support the DWARF 4 representation for bitfields.
+ UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB();
+
+ Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
+}
+
+// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
+DwarfDebug::~DwarfDebug() { }
+
+static bool isObjCClass(StringRef Name) {
+ return Name.startswith("+") || Name.startswith("-");
+}
+
+static bool hasObjCCategory(StringRef Name) {
+ if (!isObjCClass(Name))
+ return false;
+
+ return Name.find(") ") != StringRef::npos;
+}
+
+static void getObjCClassCategory(StringRef In, StringRef &Class,
+ StringRef &Category) {
+ if (!hasObjCCategory(In)) {
+ Class = In.slice(In.find('[') + 1, In.find(' '));
+ Category = "";
+ return;
+ }
+
+ Class = In.slice(In.find('[') + 1, In.find('('));
+ Category = In.slice(In.find('[') + 1, In.find(' '));
+}
+
+static StringRef getObjCMethodName(StringRef In) {
+ return In.slice(In.find(' ') + 1, In.find(']'));
+}
+
+// Add the various names to the Dwarf accelerator table names.
+// TODO: Determine whether or not we should add names for programs
+// that do not have a DW_AT_name or DW_AT_linkage_name field - this
+// is only slightly different than the lookup of non-standard ObjC names.
+void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
+ if (!SP->isDefinition())
+ return;
+ addAccelName(SP->getName(), Die);
+
+ // If the linkage name is different than the name, go ahead and output
+ // that as well into the name table.
+ if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName())
+ addAccelName(SP->getLinkageName(), Die);
+
+ // If this is an Objective-C selector name add it to the ObjC accelerator
+ // too.
+ if (isObjCClass(SP->getName())) {
+ StringRef Class, Category;
+ getObjCClassCategory(SP->getName(), Class, Category);
+ addAccelObjC(Class, Die);
+ if (Category != "")
+ addAccelObjC(Category, Die);
+ // Also add the base method name to the name table.
+ addAccelName(getObjCMethodName(SP->getName()), Die);
+ }
+}
+
+/// Check whether we should create a DIE for the given Scope, return true
+/// if we don't create a DIE (the corresponding DIE is null).
+bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
+ if (Scope->isAbstractScope())
+ return false;
+
+ // We don't create a DIE if there is no Range.
+ const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
+ if (Ranges.empty())
+ return true;
+
+ if (Ranges.size() > 1)
+ return false;
+
+ // We don't create a DIE if we have a single Range and the end label
+ // is null.
+ return !getLabelAfterInsn(Ranges.front().second);
+}
+
+template <typename Func> void forBothCUs(DwarfCompileUnit &CU, Func F) {
+ F(CU);
+ if (auto *SkelCU = CU.getSkeleton())
+ F(*SkelCU);
+}
+
+void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
+ assert(Scope && Scope->getScopeNode());
+ assert(Scope->isAbstractScope());
+ assert(!Scope->getInlinedAt());
+
+ const MDNode *SP = Scope->getScopeNode();
+
+ ProcessedSPNodes.insert(SP);
+
+ // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
+ // was inlined from another compile unit.
+ auto &CU = *CUMap.lookup(cast<DISubprogram>(SP)->getUnit());
+ forBothCUs(CU, [&](DwarfCompileUnit &CU) {
+ CU.constructAbstractSubprogramScopeDIE(Scope);
+ });
+}
+
+void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
+ if (!GenerateGnuPubSections)
+ return;
+
+ U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
+}
+
+// Create new DwarfCompileUnit for the given metadata node with tag
+// DW_TAG_compile_unit.
+DwarfCompileUnit &
+DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
+ StringRef FN = DIUnit->getFilename();
+ CompilationDir = DIUnit->getDirectory();
+
+ auto OwnedUnit = make_unique<DwarfCompileUnit>(
+ InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
+ DwarfCompileUnit &NewCU = *OwnedUnit;
+ DIE &Die = NewCU.getUnitDie();
+ InfoHolder.addUnit(std::move(OwnedUnit));
+ if (useSplitDwarf()) {
+ NewCU.setSkeleton(constructSkeletonCU(NewCU));
+ NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
+ DIUnit->getSplitDebugFilename());
+ }
+
+ // LTO with assembly output shares a single line table amongst multiple CUs.
+ // To avoid the compilation directory being ambiguous, let the line table
+ // explicitly describe the directory of all files, never relying on the
+ // compilation directory.
+ if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
+ Asm->OutStreamer->getContext().setMCLineTableCompilationDir(
+ NewCU.getUniqueID(), CompilationDir);
+
+ NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer());
+ NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ DIUnit->getSourceLanguage());
+ NewCU.addString(Die, dwarf::DW_AT_name, FN);
+
+ if (!useSplitDwarf()) {
+ NewCU.initStmtList();
+
+ // If we're using split dwarf the compilation dir is going to be in the
+ // skeleton CU and so we don't need to duplicate it here.
+ if (!CompilationDir.empty())
+ NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+
+ addGnuPubAttributes(NewCU, Die);
+ }
+
+ if (useAppleExtensionAttributes()) {
+ if (DIUnit->isOptimized())
+ NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
+
+ StringRef Flags = DIUnit->getFlags();
+ if (!Flags.empty())
+ NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
+
+ if (unsigned RVer = DIUnit->getRuntimeVersion())
+ NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
+ }
+
+ if (useSplitDwarf())
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
+ else
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+
+ if (DIUnit->getDWOId()) {
+ // This CU is either a clang module DWO or a skeleton CU.
+ NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
+ DIUnit->getDWOId());
+ if (!DIUnit->getSplitDebugFilename().empty())
+ // This is a prefabricated skeleton CU.
+ NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
+ DIUnit->getSplitDebugFilename());
+ }
+
+ CUMap.insert(std::make_pair(DIUnit, &NewCU));
+ CUDieMap.insert(std::make_pair(&Die, &NewCU));
+ return NewCU;
+}
+
+void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
+ const DIImportedEntity *N) {
+ if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
+ D->addChild(TheCU.constructImportedEntityDIE(N));
+}
+
+// Emit all Dwarf sections that should come prior to the content. Create
+// global DIEs and emit initial debug info sections. This is invoked by
+// the target AsmPrinter.
+void DwarfDebug::beginModule() {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ if (DisableDebugInfoPrinting)
+ return;
+
+ const Module *M = MMI->getModule();
+
+ unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
+ M->debug_compile_units_end());
+ // Tell MMI whether we have debug info.
+ MMI->setDebugInfoAvailability(NumDebugCUs > 0);
+ SingleCU = NumDebugCUs == 1;
+
+ for (DICompileUnit *CUNode : M->debug_compile_units()) {
+ DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
+ for (auto *IE : CUNode->getImportedEntities())
+ CU.addImportedEntity(IE);
+ for (auto *GV : CUNode->getGlobalVariables())
+ CU.getOrCreateGlobalVariableDIE(GV);
+ for (auto *Ty : CUNode->getEnumTypes()) {
+ // The enum types array by design contains pointers to
+ // MDNodes rather than DIRefs. Unique them here.
+ CU.getOrCreateTypeDIE(cast<DIType>(Ty));
+ }
+ for (auto *Ty : CUNode->getRetainedTypes()) {
+ // The retained types array by design contains pointers to
+ // MDNodes rather than DIRefs. Unique them here.
+ if (DIType *RT = dyn_cast<DIType>(Ty))
+ if (!RT->isExternalTypeRef())
+ // There is no point in force-emitting a forward declaration.
+ CU.getOrCreateTypeDIE(RT);
+ }
+ // Emit imported_modules last so that the relevant context is already
+ // available.
+ for (auto *IE : CUNode->getImportedEntities())
+ constructAndAddImportedEntityDIE(CU, IE);
+ }
+}
+
+void DwarfDebug::finishVariableDefinitions() {
+ for (const auto &Var : ConcreteVariables) {
+ DIE *VariableDie = Var->getDIE();
+ assert(VariableDie);
+ // FIXME: Consider the time-space tradeoff of just storing the unit pointer
+ // in the ConcreteVariables list, rather than looking it up again here.
+ // DIE::getUnit isn't simple - it walks parent pointers, etc.
+ DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
+ assert(Unit);
+ DbgVariable *AbsVar = getExistingAbstractVariable(
+ InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
+ if (AbsVar && AbsVar->getDIE()) {
+ Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
+ *AbsVar->getDIE());
+ } else
+ Unit->applyVariableAttributes(*Var, *VariableDie);
+ }
+}
+
+void DwarfDebug::finishSubprogramDefinitions() {
+ for (auto &F : MMI->getModule()->functions())
+ if (auto *SP = F.getSubprogram())
+ if (ProcessedSPNodes.count(SP) &&
+ SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
+ forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
+ CU.finishSubprogramDefinition(SP);
+ });
+}
+
+void DwarfDebug::finalizeModuleInfo() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ finishSubprogramDefinitions();
+
+ finishVariableDefinitions();
+
+ // Handle anything that needs to be done on a per-unit basis after
+ // all other generation.
+ for (const auto &P : CUMap) {
+ auto &TheCU = *P.second;
+ // Emit DW_AT_containing_type attribute to connect types with their
+ // vtable holding type.
+ TheCU.constructContainingTypeDIEs();
+
+ // Add CU specific attributes if we need to add any.
+ // If we're splitting the dwarf out now that we've got the entire
+ // CU then add the dwo id to it.
+ auto *SkCU = TheCU.getSkeleton();
+ if (useSplitDwarf()) {
+ // Emit a unique identifier for this CU.
+ uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie());
+ TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+ SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, ID);
+
+ // We don't keep track of which addresses are used in which CU so this
+ // is a bit pessimistic under LTO.
+ if (!AddrPool.isEmpty()) {
+ const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol();
+ SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
+ Sym, Sym);
+ }
+ if (!SkCU->getRangeLists().empty()) {
+ const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
+ SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
+ Sym, Sym);
+ }
+ }
+
+ // If we have code split among multiple sections or non-contiguous
+ // ranges of code then emit a DW_AT_ranges attribute on the unit that will
+ // remain in the .o file, otherwise add a DW_AT_low_pc.
+ // FIXME: We should use ranges allow reordering of code ala
+ // .subsections_via_symbols in mach-o. This would mean turning on
+ // ranges for all subprogram DIEs for mach-o.
+ DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+ if (unsigned NumRanges = TheCU.getRanges().size()) {
+ if (NumRanges > 1)
+ // A DW_AT_low_pc attribute may also be specified in combination with
+ // DW_AT_ranges to specify the default base address for use in
+ // location lists (see Section 2.6.2) and range lists (see Section
+ // 2.17.3).
+ U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+ else
+ U.setBaseAddress(TheCU.getRanges().front().getStart());
+ U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
+ }
+
+ auto *CUNode = cast<DICompileUnit>(P.first);
+ // If compile Unit has macros, emit "DW_AT_macro_info" attribute.
+ if (CUNode->getMacros())
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+ }
+
+ // Compute DIE offsets and sizes.
+ InfoHolder.computeSizeAndOffsets();
+ if (useSplitDwarf())
+ SkeletonHolder.computeSizeAndOffsets();
+}
+
+// Emit all Dwarf sections that should come after the content.
+void DwarfDebug::endModule() {
+ assert(CurFn == nullptr);
+ assert(CurMI == nullptr);
+
+ // If we aren't actually generating debug info (check beginModule -
+ // conditionalized on !DisableDebugInfoPrinting and the presence of the
+ // llvm.dbg.cu metadata node)
+ if (!MMI->hasDebugInfo())
+ return;
+
+ // Finalize the debug info for the module.
+ finalizeModuleInfo();
+
+ emitDebugStr();
+
+ if (useSplitDwarf())
+ emitDebugLocDWO();
+ else
+ // Emit info into a debug loc section.
+ emitDebugLoc();
+
+ // Corresponding abbreviations into a abbrev section.
+ emitAbbreviations();
+
+ // Emit all the DIEs into a debug info section.
+ emitDebugInfo();
+
+ // Emit info into a debug aranges section.
+ if (GenerateARangeSection)
+ emitDebugARanges();
+
+ // Emit info into a debug ranges section.
+ emitDebugRanges();
+
+ // Emit info into a debug macinfo section.
+ emitDebugMacinfo();
+
+ if (useSplitDwarf()) {
+ emitDebugStrDWO();
+ emitDebugInfoDWO();
+ emitDebugAbbrevDWO();
+ emitDebugLineDWO();
+ // Emit DWO addresses.
+ AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
+ }
+
+ // Emit info into the dwarf accelerator table sections.
+ if (useDwarfAccelTables()) {
+ emitAccelNames();
+ emitAccelObjC();
+ emitAccelNamespaces();
+ emitAccelTypes();
+ }
+
+ // Emit the pubnames and pubtypes sections if requested.
+ if (HasDwarfPubSections) {
+ emitDebugPubNames(GenerateGnuPubSections);
+ emitDebugPubTypes(GenerateGnuPubSections);
+ }
+
+ // clean up.
+ AbstractVariables.clear();
+}
+
+// Find abstract variable, if any, associated with Var.
+DbgVariable *
+DwarfDebug::getExistingAbstractVariable(InlinedVariable IV,
+ const DILocalVariable *&Cleansed) {
+ // More then one inlined variable corresponds to one abstract variable.
+ Cleansed = IV.first;
+ auto I = AbstractVariables.find(Cleansed);
+ if (I != AbstractVariables.end())
+ return I->second.get();
+ return nullptr;
+}
+
+DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
+ const DILocalVariable *Cleansed;
+ return getExistingAbstractVariable(IV, Cleansed);
+}
+
+void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
+ LexicalScope *Scope) {
+ auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
+ InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
+ AbstractVariables[Var] = std::move(AbsDbgVariable);
+}
+
+void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV,
+ const MDNode *ScopeNode) {
+ const DILocalVariable *Cleansed = nullptr;
+ if (getExistingAbstractVariable(IV, Cleansed))
+ return;
+
+ createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+ cast<DILocalScope>(ScopeNode)));
+}
+
+void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
+ InlinedVariable IV, const MDNode *ScopeNode) {
+ const DILocalVariable *Cleansed = nullptr;
+ if (getExistingAbstractVariable(IV, Cleansed))
+ return;
+
+ if (LexicalScope *Scope =
+ LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
+ createAbstractVariable(Cleansed, Scope);
+}
+
+// Collect variable information from side table maintained by MMI.
+void DwarfDebug::collectVariableInfoFromMMITable(
+ DenseSet<InlinedVariable> &Processed) {
+ for (const auto &VI : MMI->getVariableDbgInfo()) {
+ if (!VI.Var)
+ continue;
+ assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
+ "Expected inlined-at fields to agree");
+
+ InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt());
+ Processed.insert(Var);
+ LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
+
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
+ auto RegVar = make_unique<DbgVariable>(Var.first, Var.second);
+ RegVar->initializeMMI(VI.Expr, VI.Slot);
+ if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
+ ConcreteVariables.push_back(std::move(RegVar));
+ }
+}
+
+// Get .debug_loc entry for the instruction range starting at MI.
+static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
+ const DIExpression *Expr = MI->getDebugExpression();
+
+ assert(MI->getNumOperands() == 4);
+ if (MI->getOperand(0).isReg()) {
+ MachineLocation MLoc;
+ // If the second operand is an immediate, this is a
+ // register-indirect address.
+ if (!MI->getOperand(1).isImm())
+ MLoc.set(MI->getOperand(0).getReg());
+ else
+ MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ return DebugLocEntry::Value(Expr, MLoc);
+ }
+ if (MI->getOperand(0).isImm())
+ return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm());
+ if (MI->getOperand(0).isFPImm())
+ return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm());
+ if (MI->getOperand(0).isCImm())
+ return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm());
+
+ llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
+}
+
+/// \brief If this and Next are describing different pieces of the same
+/// variable, merge them by appending Next's values to the current
+/// list of values.
+/// Return true if the merge was successful.
+bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
+ if (Begin == Next.Begin) {
+ auto *FirstExpr = cast<DIExpression>(Values[0].Expression);
+ auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression);
+ if (!FirstExpr->isBitPiece() || !FirstNextExpr->isBitPiece())
+ return false;
+
+ // We can only merge entries if none of the pieces overlap any others.
+ // In doing so, we can take advantage of the fact that both lists are
+ // sorted.
+ for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
+ for (; j < Next.Values.size(); ++j) {
+ int res = DebugHandlerBase::pieceCmp(
+ cast<DIExpression>(Values[i].Expression),
+ cast<DIExpression>(Next.Values[j].Expression));
+ if (res == 0) // The two expressions overlap, we can't merge.
+ return false;
+ // Values[i] is entirely before Next.Values[j],
+ // so go back to the next entry of Values.
+ else if (res == -1)
+ break;
+ // Next.Values[j] is entirely before Values[i], so go on to the
+ // next entry of Next.Values.
+ }
+ }
+
+ addValues(Next.Values);
+ End = Next.End;
+ return true;
+ }
+ return false;
+}
+
+/// Build the location list for all DBG_VALUEs in the function that
+/// describe the same variable. If the ranges of several independent
+/// pieces of the same variable overlap partially, split them up and
+/// combine the ranges. The resulting DebugLocEntries are will have
+/// strict monotonically increasing begin addresses and will never
+/// overlap.
+//
+// Input:
+//
+// Ranges History [var, loc, piece ofs size]
+// 0 | [x, (reg0, piece 0, 32)]
+// 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry
+// 2 | | ...
+// 3 | [clobber reg0]
+// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of
+// x.
+//
+// Output:
+//
+// [0-1] [x, (reg0, piece 0, 32)]
+// [1-3] [x, (reg0, piece 0, 32), (reg1, piece 32, 32)]
+// [3-4] [x, (reg1, piece 32, 32)]
+// [4- ] [x, (mem, piece 0, 64)]
+void
+DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::InstrRanges &Ranges) {
+ SmallVector<DebugLocEntry::Value, 4> OpenRanges;
+
+ for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
+ const MachineInstr *Begin = I->first;
+ const MachineInstr *End = I->second;
+ assert(Begin->isDebugValue() && "Invalid History entry");
+
+ // Check if a variable is inaccessible in this range.
+ if (Begin->getNumOperands() > 1 &&
+ Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) {
+ OpenRanges.clear();
+ continue;
+ }
+
+ // If this piece overlaps with any open ranges, truncate them.
+ const DIExpression *DIExpr = Begin->getDebugExpression();
+ auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(),
+ [&](DebugLocEntry::Value R) {
+ return piecesOverlap(DIExpr, R.getExpression());
+ });
+ OpenRanges.erase(Last, OpenRanges.end());
+
+ const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
+ assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
+
+ const MCSymbol *EndLabel;
+ if (End != nullptr)
+ EndLabel = getLabelAfterInsn(End);
+ else if (std::next(I) == Ranges.end())
+ EndLabel = Asm->getFunctionEnd();
+ else
+ EndLabel = getLabelBeforeInsn(std::next(I)->first);
+ assert(EndLabel && "Forgot label after instruction ending a range!");
+
+ DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
+
+ auto Value = getDebugLocValue(Begin);
+ DebugLocEntry Loc(StartLabel, EndLabel, Value);
+ bool couldMerge = false;
+
+ // If this is a piece, it may belong to the current DebugLocEntry.
+ if (DIExpr->isBitPiece()) {
+ // Add this value to the list of open ranges.
+ OpenRanges.push_back(Value);
+
+ // Attempt to add the piece to the last entry.
+ if (!DebugLoc.empty())
+ if (DebugLoc.back().MergeValues(Loc))
+ couldMerge = true;
+ }
+
+ if (!couldMerge) {
+ // Need to add a new DebugLocEntry. Add all values from still
+ // valid non-overlapping pieces.
+ if (OpenRanges.size())
+ Loc.addValues(OpenRanges);
+
+ DebugLoc.push_back(std::move(Loc));
+ }
+
+ // Attempt to coalesce the ranges of two otherwise identical
+ // DebugLocEntries.
+ auto CurEntry = DebugLoc.rbegin();
+ DEBUG({
+ dbgs() << CurEntry->getValues().size() << " Values:\n";
+ for (auto &Value : CurEntry->getValues())
+ Value.dump();
+ dbgs() << "-----\n";
+ });
+
+ auto PrevEntry = std::next(CurEntry);
+ if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
+ DebugLoc.pop_back();
+ }
+}
+
+DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope,
+ InlinedVariable IV) {
+ ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode());
+ ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second));
+ InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
+ return ConcreteVariables.back().get();
+}
+
+// Determine whether this DBG_VALUE is valid at the beginning of the function.
+static bool validAtEntry(const MachineInstr *MInsn) {
+ auto MBB = MInsn->getParent();
+ // Is it in the entry basic block?
+ if (!MBB->pred_empty())
+ return false;
+ for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I)
+ if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup)))
+ return false;
+ return true;
+}
+
+// Find variables for each lexical scope.
+void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
+ const DISubprogram *SP,
+ DenseSet<InlinedVariable> &Processed) {
+ // Grab the variable info that was squirreled away in the MMI side-table.
+ collectVariableInfoFromMMITable(Processed);
+
+ for (const auto &I : DbgValues) {
+ InlinedVariable IV = I.first;
+ if (Processed.count(IV))
+ continue;
+
+ // Instruction ranges, specifying where IV is accessible.
+ const auto &Ranges = I.second;
+ if (Ranges.empty())
+ continue;
+
+ LexicalScope *Scope = nullptr;
+ if (const DILocation *IA = IV.second)
+ Scope = LScopes.findInlinedScope(IV.first->getScope(), IA);
+ else
+ Scope = LScopes.findLexicalScope(IV.first->getScope());
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ Processed.insert(IV);
+ DbgVariable *RegVar = createConcreteVariable(*Scope, IV);
+
+ const MachineInstr *MInsn = Ranges.front().first;
+ assert(MInsn->isDebugValue() && "History must begin with debug value");
+
+ // Check if there is a single DBG_VALUE, valid throughout the function.
+ // A single constant is also considered valid for the entire function.
+ if (Ranges.size() == 1 &&
+ (MInsn->getOperand(0).isImm() ||
+ (validAtEntry(MInsn) && Ranges.front().second == nullptr))) {
+ RegVar->initializeDbgValue(MInsn);
+ continue;
+ }
+
+ // Handle multiple DBG_VALUE instructions describing one variable.
+ DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn);
+
+ // Build the location list for this variable.
+ SmallVector<DebugLocEntry, 8> Entries;
+ buildLocationList(Entries, Ranges);
+
+ // If the variable has a DIBasicType, extract it. Basic types cannot have
+ // unique identifiers, so don't bother resolving the type with the
+ // identifier map.
+ const DIBasicType *BT = dyn_cast<DIBasicType>(
+ static_cast<const Metadata *>(IV.first->getType()));
+
+ // Finalize the entry by lowering it into a DWARF bytestream.
+ for (auto &Entry : Entries)
+ Entry.finalize(*Asm, List, BT);
+ }
+
+ // Collect info for variables that were optimized out.
+ for (const DILocalVariable *DV : SP->getVariables()) {
+ if (Processed.insert(InlinedVariable(DV, nullptr)).second)
+ if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
+ createConcreteVariable(*Scope, InlinedVariable(DV, nullptr));
+ }
+}
+
+// Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+ DebugHandlerBase::beginInstruction(MI);
+ assert(CurMI);
+
+ // Check if source location changes, but ignore DBG_VALUE locations.
+ if (!MI->isDebugValue()) {
+ const DebugLoc &DL = MI->getDebugLoc();
+ if (DL != PrevInstLoc) {
+ if (DL) {
+ unsigned Flags = 0;
+ PrevInstLoc = DL;
+ if (DL == PrologEndLoc) {
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ PrologEndLoc = DebugLoc();
+ Flags |= DWARF2_FLAG_IS_STMT;
+ }
+ if (DL.getLine() !=
+ Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine())
+ Flags |= DWARF2_FLAG_IS_STMT;
+
+ const MDNode *Scope = DL.getScope();
+ recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+ } else if (UnknownLocations) {
+ PrevInstLoc = DL;
+ recordSourceLine(0, 0, nullptr, 0);
+ }
+ }
+ }
+}
+
+static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
+ // First known non-DBG_VALUE and non-frame setup location marks
+ // the beginning of the function body.
+ for (const auto &MBB : *MF)
+ for (const auto &MI : MBB)
+ if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
+ MI.getDebugLoc())
+ return MI.getDebugLoc();
+ return DebugLoc();
+}
+
+// Gather pre-function debug information. Assumes being called immediately
+// after the function entry point has been emitted.
+void DwarfDebug::beginFunction(const MachineFunction *MF) {
+ CurFn = MF;
+
+ // If there's no debug info for the function we're not going to do anything.
+ if (!MMI->hasDebugInfo())
+ return;
+
+ auto DI = MF->getFunction()->getSubprogram();
+ if (!DI)
+ return;
+
+ // Grab the lexical scopes for the function, if we don't have any of those
+ // then we're not going to be able to do anything.
+ DebugHandlerBase::beginFunction(MF);
+ if (LScopes.empty())
+ return;
+
+ // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
+ // belongs to so that we add to the correct per-cu line table in the
+ // non-asm case.
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ // FnScope->getScopeNode() and DI->second should represent the same function,
+ // though they may not be the same MDNode due to inline functions merged in
+ // LTO where the debug info metadata still differs (either due to distinct
+ // written differences - two versions of a linkonce_odr function
+ // written/copied into two separate files, or some sub-optimal metadata that
+ // isn't structurally identical (see: file path/name info from clang, which
+ // includes the directory of the cpp file being built, even when the file name
+ // is absolute (such as an <> lookup header)))
+ auto *SP = cast<DISubprogram>(FnScope->getScopeNode());
+ DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit());
+ if (!TheCU) {
+ assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug &&
+ "DICompileUnit missing from llvm.dbg.cu?");
+ return;
+ }
+ if (Asm->OutStreamer->hasRawTextSupport())
+ // Use a single line table if we are generating assembly.
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
+ else
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+
+ // Record beginning of function.
+ PrologEndLoc = findPrologueEndLoc(MF);
+ if (DILocation *L = PrologEndLoc) {
+ // We'd like to list the prologue as "not statements" but GDB behaves
+ // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+ auto *SP = L->getInlinedAtScope()->getSubprogram();
+ recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
+ }
+}
+
+// Gather and emit post-function debug information.
+void DwarfDebug::endFunction(const MachineFunction *MF) {
+ assert(CurFn == MF &&
+ "endFunction should be called with the same function as beginFunction");
+
+ const DISubprogram *SP = MF->getFunction()->getSubprogram();
+ if (!MMI->hasDebugInfo() || LScopes.empty() || !SP ||
+ SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) {
+ // If we don't have a lexical scope for this function then there will
+ // be a hole in the range information. Keep note of this by setting the
+ // previously used section to nullptr.
+ PrevCU = nullptr;
+ CurFn = nullptr;
+ DebugHandlerBase::endFunction(MF);
+ // Mark functions with no debug info on any instructions, but a
+ // valid DISubprogram as processed.
+ if (SP)
+ ProcessedSPNodes.insert(SP);
+ return;
+ }
+
+ // Set DwarfDwarfCompileUnitID in MCContext to default value.
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
+
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ SP = cast<DISubprogram>(FnScope->getScopeNode());
+ DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
+
+ DenseSet<InlinedVariable> ProcessedVars;
+ collectVariableInfo(TheCU, SP, ProcessedVars);
+
+ // Add the range of this function to the list of ranges for the CU.
+ TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
+
+ // Under -gmlt, skip building the subprogram if there are no inlined
+ // subroutines inside it.
+ if (TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
+ LScopes.getAbstractScopesList().empty() && !IsDarwin) {
+ assert(InfoHolder.getScopeVariables().empty());
+ assert(DbgValues.empty());
+ // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed
+ // by a -gmlt CU. Add a test and remove this assertion.
+ assert(AbstractVariables.empty());
+ PrevLabel = nullptr;
+ CurFn = nullptr;
+ DebugHandlerBase::endFunction(MF);
+ return;
+ }
+
+#ifndef NDEBUG
+ size_t NumAbstractScopes = LScopes.getAbstractScopesList().size();
+#endif
+ // Construct abstract scopes.
+ for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
+ auto *SP = cast<DISubprogram>(AScope->getScopeNode());
+ // Collect info for variables that were optimized out.
+ for (const DILocalVariable *DV : SP->getVariables()) {
+ if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
+ continue;
+ ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr),
+ DV->getScope());
+ assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
+ && "ensureAbstractVariableIsCreated inserted abstract scopes");
+ }
+ constructAbstractSubprogramScopeDIE(AScope);
+ }
+
+ TheCU.constructSubprogramScopeDIE(FnScope);
+ if (auto *SkelCU = TheCU.getSkeleton())
+ if (!LScopes.getAbstractScopesList().empty())
+ SkelCU->constructSubprogramScopeDIE(FnScope);
+
+ // Clear debug info
+ // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
+ // DbgVariables except those that are also in AbstractVariables (since they
+ // can be used cross-function)
+ InfoHolder.getScopeVariables().clear();
+ PrevLabel = nullptr;
+ CurFn = nullptr;
+ DebugHandlerBase::endFunction(MF);
+}
+
+// Register a source line with debug info. Returns the unique label that was
+// emitted and which provides correspondence to the source line list.
+void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
+ unsigned Flags) {
+ StringRef Fn;
+ StringRef Dir;
+ unsigned Src = 1;
+ unsigned Discriminator = 0;
+ if (auto *Scope = cast_or_null<DIScope>(S)) {
+ Fn = Scope->getFilename();
+ Dir = Scope->getDirectory();
+ if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
+ Discriminator = LBF->getDiscriminator();
+
+ unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
+ Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
+ .getOrCreateSourceID(Fn, Dir);
+ }
+ Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
+ Discriminator, Fn);
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+// Emit the debug info section.
+void DwarfDebug::emitDebugInfo() {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.emitUnits(/* UseOffsets */ false);
+}
+
+// Emit the abbreviation section.
+void DwarfDebug::emitAbbreviations() {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+
+ Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
+}
+
+void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section,
+ StringRef TableName) {
+ Accel.FinalizeTable(Asm, TableName);
+ Asm->OutStreamer->SwitchSection(Section);
+
+ // Emit the full data.
+ Accel.emit(Asm, Section->getBeginSymbol(), this);
+}
+
+// Emit visible names into a hashed accelerator table section.
+void DwarfDebug::emitAccelNames() {
+ emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
+ "Names");
+}
+
+// Emit objective C classes and categories into a hashed accelerator table
+// section.
+void DwarfDebug::emitAccelObjC() {
+ emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
+ "ObjC");
+}
+
+// Emit namespace dies into a hashed accelerator table.
+void DwarfDebug::emitAccelNamespaces() {
+ emitAccel(AccelNamespace,
+ Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
+ "namespac");
+}
+
+// Emit type dies into a hashed accelerator table.
+void DwarfDebug::emitAccelTypes() {
+ emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
+ "types");
+}
+
+// Public name handling.
+// The format for the various pubnames:
+//
+// dwarf pubnames - offset/name pairs where the offset is the offset into the CU
+// for the DIE that is named.
+//
+// gnu pubnames - offset/index value/name tuples where the offset is the offset
+// into the CU and the index value is computed according to the type of value
+// for the DIE that is named.
+//
+// For type units the offset is the offset of the skeleton DIE. For split dwarf
+// it's the offset within the debug_info/debug_types dwo section, however, the
+// reference in the pubname header doesn't change.
+
+/// computeIndexValue - Compute the gdb index value for the DIE and CU.
+static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
+ const DIE *Die) {
+ dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;
+
+ // We could have a specification DIE that has our most of our knowledge,
+ // look for that now.
+ if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) {
+ DIE &SpecDIE = SpecVal.getDIEEntry().getEntry();
+ if (SpecDIE.findAttribute(dwarf::DW_AT_external))
+ Linkage = dwarf::GIEL_EXTERNAL;
+ } else if (Die->findAttribute(dwarf::DW_AT_external))
+ Linkage = dwarf::GIEL_EXTERNAL;
+
+ switch (Die->getTag()) {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_enumeration_type:
+ return dwarf::PubIndexEntryDescriptor(
+ dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus
+ ? dwarf::GIEL_STATIC
+ : dwarf::GIEL_EXTERNAL);
+ case dwarf::DW_TAG_typedef:
+ case dwarf::DW_TAG_base_type:
+ case dwarf::DW_TAG_subrange_type:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC);
+ case dwarf::DW_TAG_namespace:
+ return dwarf::GIEK_TYPE;
+ case dwarf::DW_TAG_subprogram:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage);
+ case dwarf::DW_TAG_variable:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage);
+ case dwarf::DW_TAG_enumerator:
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE,
+ dwarf::GIEL_STATIC);
+ default:
+ return dwarf::GIEK_NONE;
+ }
+}
+
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
+ MCSection *PSec = GnuStyle
+ ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
+ : Asm->getObjFileLowering().getDwarfPubNamesSection();
+
+ emitDebugPubSection(GnuStyle, PSec, "Names",
+ &DwarfCompileUnit::getGlobalNames);
+}
+
+void DwarfDebug::emitDebugPubSection(
+ bool GnuStyle, MCSection *PSec, StringRef Name,
+ const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) {
+ for (const auto &NU : CUMap) {
+ DwarfCompileUnit *TheU = NU.second;
+
+ const auto &Globals = (TheU->*Accessor)();
+
+ if (Globals.empty())
+ continue;
+
+ if (auto *Skeleton = TheU->getSkeleton())
+ TheU = Skeleton;
+
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer->SwitchSection(PSec);
+
+ // Emit the header.
+ Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
+ MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
+ MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
+ Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+
+ Asm->OutStreamer->EmitLabel(BeginLabel);
+
+ Asm->OutStreamer->AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
+
+ Asm->OutStreamer->AddComment("Offset of Compilation Unit Info");
+ Asm->emitDwarfSymbolReference(TheU->getLabelBegin());
+
+ Asm->OutStreamer->AddComment("Compilation Unit Length");
+ Asm->EmitInt32(TheU->getLength());
+
+ // Emit the pubnames for this compilation unit.
+ for (const auto &GI : Globals) {
+ const char *Name = GI.getKeyData();
+ const DIE *Entity = GI.second;
+
+ Asm->OutStreamer->AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (GnuStyle) {
+ dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
+ Asm->OutStreamer->AddComment(
+ Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
+ dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
+ Asm->EmitInt8(Desc.toBits());
+ }
+
+ Asm->OutStreamer->AddComment("External Name");
+ Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
+ }
+
+ Asm->OutStreamer->AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer->EmitLabel(EndLabel);
+ }
+}
+
+void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
+ MCSection *PSec = GnuStyle
+ ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
+ : Asm->getObjFileLowering().getDwarfPubTypesSection();
+
+ emitDebugPubSection(GnuStyle, PSec, "Types",
+ &DwarfCompileUnit::getGlobalTypes);
+}
+
+/// Emit null-terminated strings into a debug str section.
+void DwarfDebug::emitDebugStr() {
+ DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+ Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
+}
+
+void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
+ const DebugLocStream::Entry &Entry) {
+ auto &&Comments = DebugLocs.getComments(Entry);
+ auto Comment = Comments.begin();
+ auto End = Comments.end();
+ for (uint8_t Byte : DebugLocs.getBytes(Entry))
+ Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
+}
+
+static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+ ByteStreamer &Streamer,
+ const DebugLocEntry::Value &Value,
+ unsigned PieceOffsetInBits) {
+ DebugLocDwarfExpression DwarfExpr(AP.getDwarfDebug()->getDwarfVersion(),
+ Streamer);
+ // Regular entry.
+ if (Value.isInt()) {
+ if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
+ BT->getEncoding() == dwarf::DW_ATE_signed_char))
+ DwarfExpr.AddSignedConstant(Value.getInt());
+ else
+ DwarfExpr.AddUnsignedConstant(Value.getInt());
+ } else if (Value.isLocation()) {
+ MachineLocation Loc = Value.getLoc();
+ const DIExpression *Expr = Value.getExpression();
+ if (!Expr || !Expr->getNumElements())
+ // Regular entry.
+ AP.EmitDwarfRegOp(Streamer, Loc);
+ else {
+ // Complex address entry.
+ const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
+ if (Loc.getOffset()) {
+ DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
+ DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(),
+ PieceOffsetInBits);
+ } else
+ DwarfExpr.AddMachineRegExpression(TRI, Expr, Loc.getReg(),
+ PieceOffsetInBits);
+ }
+ } else if (Value.isConstantFP()) {
+ APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
+ DwarfExpr.AddUnsignedConstant(RawBytes);
+ }
+}
+
+void DebugLocEntry::finalize(const AsmPrinter &AP,
+ DebugLocStream::ListBuilder &List,
+ const DIBasicType *BT) {
+ DebugLocStream::EntryBuilder Entry(List, Begin, End);
+ BufferByteStreamer Streamer = Entry.getStreamer();
+ const DebugLocEntry::Value &Value = Values[0];
+ if (Value.isBitPiece()) {
+ // Emit all pieces that belong to the same variable and range.
+ assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
+ return P.isBitPiece();
+ }) && "all values are expected to be pieces");
+ assert(std::is_sorted(Values.begin(), Values.end()) &&
+ "pieces are expected to be sorted");
+
+ unsigned Offset = 0;
+ for (auto Piece : Values) {
+ const DIExpression *Expr = Piece.getExpression();
+ unsigned PieceOffset = Expr->getBitPieceOffset();
+ unsigned PieceSize = Expr->getBitPieceSize();
+ assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
+ if (Offset < PieceOffset) {
+ // The DWARF spec seriously mandates pieces with no locations for gaps.
+ DebugLocDwarfExpression Expr(AP.getDwarfDebug()->getDwarfVersion(),
+ Streamer);
+ Expr.AddOpPiece(PieceOffset-Offset, 0);
+ Offset += PieceOffset-Offset;
+ }
+ Offset += PieceSize;
+
+ emitDebugLocValue(AP, BT, Streamer, Piece, PieceOffset);
+ }
+ } else {
+ assert(Values.size() == 1 && "only pieces may have >1 value");
+ emitDebugLocValue(AP, BT, Streamer, Value, 0);
+ }
+}
+
+void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
+ // Emit the size.
+ Asm->OutStreamer->AddComment("Loc expr size");
+ Asm->EmitInt16(DebugLocs.getBytes(Entry).size());
+
+ // Emit the entry.
+ APByteStreamer Streamer(*Asm);
+ emitDebugLocEntry(Streamer, Entry);
+}
+
+// Emit locations into the debug loc section.
+void DwarfDebug::emitDebugLoc() {
+ // Start the dwarf loc section.
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+ unsigned char Size = Asm->getDataLayout().getPointerSize();
+ for (const auto &List : DebugLocs.getLists()) {
+ Asm->OutStreamer->EmitLabel(List.Label);
+ const DwarfCompileUnit *CU = List.CU;
+ for (const auto &Entry : DebugLocs.getEntries(List)) {
+ // Set up the range. This range is relative to the entry point of the
+ // compile unit. This is a hard coded 0 for low_pc when we're emitting
+ // ranges, or the DW_AT_low_pc on the compile unit otherwise.
+ if (auto *Base = CU->getBaseAddress()) {
+ Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
+ Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
+ } else {
+ Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size);
+ Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
+ }
+
+ emitDebugLocEntryLocation(Entry);
+ }
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ }
+}
+
+void DwarfDebug::emitDebugLocDWO() {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocDWOSection());
+ for (const auto &List : DebugLocs.getLists()) {
+ Asm->OutStreamer->EmitLabel(List.Label);
+ for (const auto &Entry : DebugLocs.getEntries(List)) {
+ // Just always use start_length for now - at least that's one address
+ // rather than two. We could get fancier and try to, say, reuse an
+ // address we know we've emitted elsewhere (the start of the function?
+ // The start of the CU or CU subrange that encloses this range?)
+ Asm->EmitInt8(dwarf::DW_LLE_start_length_entry);
+ unsigned idx = AddrPool.getIndex(Entry.BeginSym);
+ Asm->EmitULEB128(idx);
+ Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
+
+ emitDebugLocEntryLocation(Entry);
+ }
+ Asm->EmitInt8(dwarf::DW_LLE_end_of_list_entry);
+ }
+}
+
+struct ArangeSpan {
+ const MCSymbol *Start, *End;
+};
+
+// Emit a debug aranges section, containing a CU lookup for any
+// address we can tie back to a CU.
+void DwarfDebug::emitDebugARanges() {
+ // Provides a unique id per text section.
+ MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
+
+ // Filter labels by section.
+ for (const SymbolCU &SCU : ArangeLabels) {
+ if (SCU.Sym->isInSection()) {
+ // Make a note of this symbol and it's section.
+ MCSection *Section = &SCU.Sym->getSection();
+ if (!Section->getKind().isMetadata())
+ SectionMap[Section].push_back(SCU);
+ } else {
+ // Some symbols (e.g. common/bss on mach-o) can have no section but still
+ // appear in the output. This sucks as we rely on sections to build
+ // arange spans. We can do it without, but it's icky.
+ SectionMap[nullptr].push_back(SCU);
+ }
+ }
+
+ DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
+
+ for (auto &I : SectionMap) {
+ MCSection *Section = I.first;
+ SmallVector<SymbolCU, 8> &List = I.second;
+ if (List.size() < 1)
+ continue;
+
+ // If we have no section (e.g. common), just write out
+ // individual spans for each symbol.
+ if (!Section) {
+ for (const SymbolCU &Cur : List) {
+ ArangeSpan Span;
+ Span.Start = Cur.Sym;
+ Span.End = nullptr;
+ assert(Cur.CU);
+ Spans[Cur.CU].push_back(Span);
+ }
+ continue;
+ }
+
+ // Sort the symbols by offset within the section.
+ std::sort(
+ List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) {
+ unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
+ unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
+
+ // Symbols with no order assigned should be placed at the end.
+ // (e.g. section end labels)
+ if (IA == 0)
+ return false;
+ if (IB == 0)
+ return true;
+ return IA < IB;
+ });
+
+ // Insert a final terminator.
+ List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));
+
+ // Build spans between each label.
+ const MCSymbol *StartSym = List[0].Sym;
+ for (size_t n = 1, e = List.size(); n < e; n++) {
+ const SymbolCU &Prev = List[n - 1];
+ const SymbolCU &Cur = List[n];
+
+ // Try and build the longest span we can within the same CU.
+ if (Cur.CU != Prev.CU) {
+ ArangeSpan Span;
+ Span.Start = StartSym;
+ Span.End = Cur.Sym;
+ assert(Prev.CU);
+ Spans[Prev.CU].push_back(Span);
+ StartSym = Cur.Sym;
+ }
+ }
+ }
+
+ // Start the dwarf aranges section.
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfARangesSection());
+
+ unsigned PtrSize = Asm->getDataLayout().getPointerSize();
+
+ // Build a list of CUs used.
+ std::vector<DwarfCompileUnit *> CUs;
+ for (const auto &it : Spans) {
+ DwarfCompileUnit *CU = it.first;
+ CUs.push_back(CU);
+ }
+
+ // Sort the CU list (again, to ensure consistent output order).
+ std::sort(CUs.begin(), CUs.end(),
+ [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) {
+ return A->getUniqueID() < B->getUniqueID();
+ });
+
+ // Emit an arange table for each CU we used.
+ for (DwarfCompileUnit *CU : CUs) {
+ std::vector<ArangeSpan> &List = Spans[CU];
+
+ // Describe the skeleton CU's offset and length, not the dwo file's.
+ if (auto *Skel = CU->getSkeleton())
+ CU = Skel;
+
+ // Emit size of content not including length itself.
+ unsigned ContentSize =
+ sizeof(int16_t) + // DWARF ARange version number
+ sizeof(int32_t) + // Offset of CU in the .debug_info section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int8_t); // Segment Size (in bytes)
+
+ unsigned TupleSize = PtrSize * 2;
+
+ // 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
+ unsigned Padding =
+ OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize);
+
+ ContentSize += Padding;
+ ContentSize += (List.size() + 1) * TupleSize;
+
+ // For each compile unit, write the list of spans it covers.
+ Asm->OutStreamer->AddComment("Length of ARange Set");
+ Asm->EmitInt32(ContentSize);
+ Asm->OutStreamer->AddComment("DWARF Arange version number");
+ Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
+ Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
+ Asm->emitDwarfSymbolReference(CU->getLabelBegin());
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(PtrSize);
+ Asm->OutStreamer->AddComment("Segment Size (in bytes)");
+ Asm->EmitInt8(0);
+
+ Asm->OutStreamer->emitFill(Padding, 0xff);
+
+ for (const ArangeSpan &Span : List) {
+ Asm->EmitLabelReference(Span.Start, PtrSize);
+
+ // Calculate the size as being from the span start to it's end.
+ if (Span.End) {
+ Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize);
+ } else {
+ // For symbols without an end marker (e.g. common), we
+ // write a single arange entry containing just that one symbol.
+ uint64_t Size = SymSize[Span.Start];
+ if (Size == 0)
+ Size = 1;
+
+ Asm->OutStreamer->EmitIntValue(Size, PtrSize);
+ }
+ }
+
+ Asm->OutStreamer->AddComment("ARange terminator");
+ Asm->OutStreamer->EmitIntValue(0, PtrSize);
+ Asm->OutStreamer->EmitIntValue(0, PtrSize);
+ }
+}
+
+/// Emit address ranges into a debug ranges section.
+void DwarfDebug::emitDebugRanges() {
+ // Start the dwarf ranges section.
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfRangesSection());
+
+ // Size for our labels.
+ unsigned char Size = Asm->getDataLayout().getPointerSize();
+
+ // Grab the specific ranges for the compile units in the module.
+ for (const auto &I : CUMap) {
+ DwarfCompileUnit *TheCU = I.second;
+
+ if (auto *Skel = TheCU->getSkeleton())
+ TheCU = Skel;
+
+ // Iterate over the misc ranges for the compile units in the module.
+ for (const RangeSpanList &List : TheCU->getRangeLists()) {
+ // Emit our symbol so we can find the beginning of the range.
+ Asm->OutStreamer->EmitLabel(List.getSym());
+
+ for (const RangeSpan &Range : List.getRanges()) {
+ const MCSymbol *Begin = Range.getStart();
+ const MCSymbol *End = Range.getEnd();
+ assert(Begin && "Range without a begin symbol?");
+ assert(End && "Range without an end symbol?");
+ if (auto *Base = TheCU->getBaseAddress()) {
+ Asm->EmitLabelDifference(Begin, Base, Size);
+ Asm->EmitLabelDifference(End, Base, Size);
+ } else {
+ Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+ Asm->OutStreamer->EmitSymbolValue(End, Size);
+ }
+ }
+
+ // And terminate the list with two 0 values.
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ }
+ }
+}
+
+void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
+ for (auto *MN : Nodes) {
+ if (auto *M = dyn_cast<DIMacro>(MN))
+ emitMacro(*M);
+ else if (auto *F = dyn_cast<DIMacroFile>(MN))
+ emitMacroFile(*F, U);
+ else
+ llvm_unreachable("Unexpected DI type!");
+ }
+}
+
+void DwarfDebug::emitMacro(DIMacro &M) {
+ Asm->EmitULEB128(M.getMacinfoType());
+ Asm->EmitULEB128(M.getLine());
+ StringRef Name = M.getName();
+ StringRef Value = M.getValue();
+ Asm->OutStreamer->EmitBytes(Name);
+ if (!Value.empty()) {
+ // There should be one space between macro name and macro value.
+ Asm->EmitInt8(' ');
+ Asm->OutStreamer->EmitBytes(Value);
+ }
+ Asm->EmitInt8('\0');
+}
+
+void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
+ assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
+ Asm->EmitULEB128(dwarf::DW_MACINFO_start_file);
+ Asm->EmitULEB128(F.getLine());
+ DIFile *File = F.getFile();
+ unsigned FID =
+ U.getOrCreateSourceID(File->getFilename(), File->getDirectory());
+ Asm->EmitULEB128(FID);
+ handleMacroNodes(F.getElements(), U);
+ Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);
+}
+
+/// Emit macros into a debug macinfo section.
+void DwarfDebug::emitDebugMacinfo() {
+ // Start the dwarf macinfo section.
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfMacinfoSection());
+
+ for (const auto &P : CUMap) {
+ auto &TheCU = *P.second;
+ auto *SkCU = TheCU.getSkeleton();
+ DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+ auto *CUNode = cast<DICompileUnit>(P.first);
+ Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
+ handleMacroNodes(CUNode->getMacros(), U);
+ }
+ Asm->OutStreamer->AddComment("End Of Macro List Mark");
+ Asm->EmitInt8(0);
+}
+
+// DWARF5 Experimental Separate Dwarf emitters.
+
+void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
+ std::unique_ptr<DwarfCompileUnit> NewU) {
+ NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
+ U.getCUNode()->getSplitDebugFilename());
+
+ if (!CompilationDir.empty())
+ NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+
+ addGnuPubAttributes(*NewU, Die);
+
+ SkeletonHolder.addUnit(std::move(NewU));
+}
+
+// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
+// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
+// DW_AT_addr_base, DW_AT_ranges_base.
+DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
+
+ auto OwnedUnit = make_unique<DwarfCompileUnit>(
+ CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
+ DwarfCompileUnit &NewCU = *OwnedUnit;
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+
+ NewCU.initStmtList();
+
+ initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
+
+ return NewCU;
+}
+
+// Emit the .debug_info.dwo section for separated dwarf. This contains the
+// compile units that would normally be in debug_info.
+void DwarfDebug::emitDebugInfoDWO() {
+ assert(useSplitDwarf() && "No split dwarf debug info?");
+ // Don't emit relocations into the dwo file.
+ InfoHolder.emitUnits(/* UseOffsets */ true);
+}
+
+// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
+// abbreviations for the .debug_info.dwo section.
+void DwarfDebug::emitDebugAbbrevDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection());
+}
+
+void DwarfDebug::emitDebugLineDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLineDWOSection());
+ SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams());
+}
+
+// Emit the .debug_str.dwo section for separated dwarf. This contains the
+// string section and is identical in format to traditional .debug_str
+// sections.
+void DwarfDebug::emitDebugStrDWO() {
+ assert(useSplitDwarf() && "No split dwarf?");
+ MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();
+ InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
+ OffSec);
+}
+
+MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
+ if (!useSplitDwarf())
+ return nullptr;
+ if (SingleCU)
+ SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory());
+ return &SplitTypeUnitFileTable;
+}
+
+uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
+ MD5 Hash;
+ Hash.update(Identifier);
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, swap bytes
+ // appropriately.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+ return support::endian::read64le(Result + 8);
+}
+
+void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
+ StringRef Identifier, DIE &RefDie,
+ const DICompositeType *CTy) {
+ // Fast path if we're building some type units and one has already used the
+ // address pool we know we're going to throw away all this work anyway, so
+ // don't bother building dependent types.
+ if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
+ return;
+
+ auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
+ if (!Ins.second) {
+ CU.addDIETypeSignature(RefDie, Ins.first->second);
+ return;
+ }
+
+ bool TopLevelType = TypeUnitsUnderConstruction.empty();
+ AddrPool.resetUsedFlag();
+
+ auto OwnedUnit = make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
+ getDwoLineTable(CU));
+ DwarfTypeUnit &NewTU = *OwnedUnit;
+ DIE &UnitDie = NewTU.getUnitDie();
+ TypeUnitsUnderConstruction.push_back(
+ std::make_pair(std::move(OwnedUnit), CTy));
+
+ NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ CU.getLanguage());
+
+ uint64_t Signature = makeTypeSignature(Identifier);
+ NewTU.setTypeSignature(Signature);
+ Ins.first->second = Signature;
+
+ if (useSplitDwarf())
+ NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
+ else {
+ CU.applyStmtList(UnitDie);
+ NewTU.initSection(
+ Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+ }
+
+ NewTU.setType(NewTU.createTypeDIE(CTy));
+
+ if (TopLevelType) {
+ auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction);
+ TypeUnitsUnderConstruction.clear();
+
+ // Types referencing entries in the address table cannot be placed in type
+ // units.
+ if (AddrPool.hasBeenUsed()) {
+
+ // Remove all the types built while building this type.
+ // This is pessimistic as some of these types might not be dependent on
+ // the type that used an address.
+ for (const auto &TU : TypeUnitsToAdd)
+ TypeSignatures.erase(TU.second);
+
+ // Construct this type in the CU directly.
+ // This is inefficient because all the dependent types will be rebuilt
+ // from scratch, including building them in type units, discovering that
+ // they depend on addresses, throwing them out and rebuilding them.
+ CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy));
+ return;
+ }
+
+ // If the type wasn't dependent on fission addresses, finish adding the type
+ // and all its dependent types.
+ for (auto &TU : TypeUnitsToAdd) {
+ InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get());
+ InfoHolder.emitUnit(TU.first.get(), useSplitDwarf());
+ }
+ }
+ CU.addDIETypeSignature(RefDie, Signature);
+}
+
+// Accelerator table mutators - add each name along with its companion
+// DIE to the proper table while ensuring that the name that we're going
+// to reference is in the string table. We do this since the names we
+// add may not only be identical to the names in the DIE.
+void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
+ if (!useDwarfAccelTables())
+ return;
+ AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
+}
+
+void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) {
+ if (!useDwarfAccelTables())
+ return;
+ AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
+}
+
+void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) {
+ if (!useDwarfAccelTables())
+ return;
+ AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
+}
+
+void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
+ if (!useDwarfAccelTables())
+ return;
+ AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 000000000000..6b06757628b6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,571 @@
+//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
+
+#include "DbgValueHistoryCalculator.h"
+#include "DebugHandlerBase.h"
+#include "DebugLocStream.h"
+#include "DwarfAccelTable.h"
+#include "DwarfFile.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetOptions.h"
+#include <memory>
+
+namespace llvm {
+
+class AsmPrinter;
+class ByteStreamer;
+class ConstantInt;
+class ConstantFP;
+class DebugLocEntry;
+class DwarfCompileUnit;
+class DwarfDebug;
+class DwarfTypeUnit;
+class DwarfUnit;
+class MachineModuleInfo;
+
+//===----------------------------------------------------------------------===//
+/// This class is used to track local variable information.
+///
+/// Variables can be created from allocas, in which case they're generated from
+/// the MMI table. Such variables can have multiple expressions and frame
+/// indices. The \a Expr and \a FrameIndices array must match.
+///
+/// Variables can be created from \c DBG_VALUE instructions. Those whose
+/// location changes over time use \a DebugLocListIndex, while those with a
+/// single instruction use \a MInsn and (optionally) a single entry of \a Expr.
+///
+/// Variables that have been optimized out use none of these fields.
+class DbgVariable {
+ const DILocalVariable *Var; /// Variable Descriptor.
+ const DILocation *IA; /// Inlined at location.
+ SmallVector<const DIExpression *, 1> Expr; /// Complex address.
+ DIE *TheDIE = nullptr; /// Variable DIE.
+ unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs.
+ const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction.
+ SmallVector<int, 1> FrameIndex; /// Frame index.
+
+public:
+ /// Construct a DbgVariable.
+ ///
+ /// Creates a variable without any DW_AT_location. Call \a initializeMMI()
+ /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions.
+ DbgVariable(const DILocalVariable *V, const DILocation *IA)
+ : Var(V), IA(IA) {}
+
+ /// Initialize from the MMI table.
+ void initializeMMI(const DIExpression *E, int FI) {
+ assert(Expr.empty() && "Already initialized?");
+ assert(FrameIndex.empty() && "Already initialized?");
+ assert(!MInsn && "Already initialized?");
+
+ assert((!E || E->isValid()) && "Expected valid expression");
+ assert(~FI && "Expected valid index");
+
+ Expr.push_back(E);
+ FrameIndex.push_back(FI);
+ }
+
+ /// Initialize from a DBG_VALUE instruction.
+ void initializeDbgValue(const MachineInstr *DbgValue) {
+ assert(Expr.empty() && "Already initialized?");
+ assert(FrameIndex.empty() && "Already initialized?");
+ assert(!MInsn && "Already initialized?");
+
+ assert(Var == DbgValue->getDebugVariable() && "Wrong variable");
+ assert(IA == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at");
+
+ MInsn = DbgValue;
+ if (auto *E = DbgValue->getDebugExpression())
+ if (E->getNumElements())
+ Expr.push_back(E);
+ }
+
+ // Accessors.
+ const DILocalVariable *getVariable() const { return Var; }
+ const DILocation *getInlinedAt() const { return IA; }
+ ArrayRef<const DIExpression *> getExpression() const { return Expr; }
+ const DIExpression *getSingleExpression() const {
+ assert(MInsn && Expr.size() <= 1);
+ return Expr.size() ? Expr[0] : nullptr;
+ }
+ void setDIE(DIE &D) { TheDIE = &D; }
+ DIE *getDIE() const { return TheDIE; }
+ void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
+ unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
+ StringRef getName() const { return Var->getName(); }
+ const MachineInstr *getMInsn() const { return MInsn; }
+ ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+
+ void addMMIEntry(const DbgVariable &V) {
+ assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
+ assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
+ assert(V.Var == Var && "conflicting variable");
+ assert(V.IA == IA && "conflicting inlined-at location");
+
+ assert(!FrameIndex.empty() && "Expected an MMI entry");
+ assert(!V.FrameIndex.empty() && "Expected an MMI entry");
+ assert(Expr.size() == FrameIndex.size() && "Mismatched expressions");
+ assert(V.Expr.size() == V.FrameIndex.size() && "Mismatched expressions");
+
+ Expr.append(V.Expr.begin(), V.Expr.end());
+ FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end());
+ assert(std::all_of(Expr.begin(), Expr.end(), [](const DIExpression *E) {
+ return E && E->isBitPiece();
+ }) && "conflicting locations for variable");
+ }
+
+ // Translate tag to proper Dwarf tag.
+ dwarf::Tag getTag() const {
+ // FIXME: Why don't we just infer this tag and store it all along?
+ if (Var->isParameter())
+ return dwarf::DW_TAG_formal_parameter;
+
+ return dwarf::DW_TAG_variable;
+ }
+ /// Return true if DbgVariable is artificial.
+ bool isArtificial() const {
+ if (Var->isArtificial())
+ return true;
+ if (getType()->isArtificial())
+ return true;
+ return false;
+ }
+
+ bool isObjectPointer() const {
+ if (Var->isObjectPointer())
+ return true;
+ if (getType()->isObjectPointer())
+ return true;
+ return false;
+ }
+
+ bool hasComplexAddress() const {
+ assert(MInsn && "Expected DBG_VALUE, not MMI variable");
+ assert(FrameIndex.empty() && "Expected DBG_VALUE, not MMI variable");
+ assert(
+ (Expr.empty() || (Expr.size() == 1 && Expr.back()->getNumElements())) &&
+ "Invalid Expr for DBG_VALUE");
+ return !Expr.empty();
+ }
+ bool isBlockByrefVariable() const;
+ const DIType *getType() const;
+
+private:
+ template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
+ return Ref.resolve();
+ }
+};
+
+
+/// Helper used to pair up a symbol and its DWARF compile unit.
+struct SymbolCU {
+ SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
+ const MCSymbol *Sym;
+ DwarfCompileUnit *CU;
+};
+
+/// Collects and handles dwarf debug information.
+class DwarfDebug : public DebugHandlerBase {
+ /// All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
+ /// Maps MDNode with its corresponding DwarfCompileUnit.
+ MapVector<const MDNode *, DwarfCompileUnit *> CUMap;
+
+ /// Maps a CU DIE with its corresponding DwarfCompileUnit.
+ DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap;
+
+ /// List of all labels used in aranges generation.
+ std::vector<SymbolCU> ArangeLabels;
+
+ /// Size of each symbol emitted (for those symbols that have a specific size).
+ DenseMap<const MCSymbol *, uint64_t> SymSize;
+
+ /// Collection of abstract variables.
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+ SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
+
+ /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
+ /// can refer to them in spite of insertions into this list.
+ DebugLocStream DebugLocs;
+
+ /// This is a collection of subprogram MDNodes that are processed to
+ /// create DIEs.
+ SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+
+ /// If nonnull, stores the current machine function we're processing.
+ const MachineFunction *CurFn;
+
+ /// If nonnull, stores the CU in which the previous subprogram was contained.
+ const DwarfCompileUnit *PrevCU;
+
+ /// As an optimization, there is no need to emit an entry in the directory
+ /// table for the same directory as DW_AT_comp_dir.
+ StringRef CompilationDir;
+
+ /// Holder for the file specific debug information.
+ DwarfFile InfoHolder;
+
+ /// Holders for the various debug information flags that we might need to
+ /// have exposed. See accessor functions below for description.
+
+ /// Map from MDNodes for user-defined types to their type signatures. Also
+ /// used to keep track of which types we have emitted type units for.
+ DenseMap<const MDNode *, uint64_t> TypeSignatures;
+
+ SmallVector<
+ std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
+ TypeUnitsUnderConstruction;
+
+ /// Whether to emit the pubnames/pubtypes sections.
+ bool HasDwarfPubSections;
+
+ /// Whether to use the GNU TLS opcode (instead of the standard opcode).
+ bool UseGNUTLSOpcode;
+
+ /// Whether to use DWARF 2 bitfields (instead of the DWARF 4 format).
+ bool UseDWARF2Bitfields;
+
+ /// Whether to emit all linkage names, or just abstract subprograms.
+ bool UseAllLinkageNames;
+
+ /// Version of dwarf we're emitting.
+ unsigned DwarfVersion;
+
+ /// DWARF5 Experimental Options
+ /// @{
+ bool HasDwarfAccelTables;
+ bool HasAppleExtensionAttributes;
+ bool HasSplitDwarf;
+
+ /// Separated Dwarf Variables
+ /// In general these will all be for bits that are left in the
+ /// original object file, rather than things that are meant
+ /// to be in the .dwo sections.
+
+ /// Holder for the skeleton information.
+ DwarfFile SkeletonHolder;
+
+ /// Store file names for type units under fission in a line table
+ /// header that will be emitted into debug_line.dwo.
+ // FIXME: replace this with a map from comp_dir to table so that we
+ // can emit multiple tables during LTO each of which uses directory
+ // 0, referencing the comp_dir of all the type units that use it.
+ MCDwarfDwoLineTable SplitTypeUnitFileTable;
+ /// @}
+
+ /// True iff there are multiple CUs in this module.
+ bool SingleCU;
+ bool IsDarwin;
+
+ AddressPool AddrPool;
+
+ DwarfAccelTable AccelNames;
+ DwarfAccelTable AccelObjC;
+ DwarfAccelTable AccelNamespace;
+ DwarfAccelTable AccelTypes;
+
+ // Identify a debugger for "tuning" the debug info.
+ DebuggerKind DebuggerTuning;
+
+ /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
+ ///
+ /// Returns whether we are "tuning" for a given debugger.
+ /// Should be used only within the constructor, to set feature flags.
+ /// @{
+ bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
+ bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
+ bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
+ /// @}
+
+ MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
+
+ const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
+ return InfoHolder.getUnits();
+ }
+
+ typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+
+ /// Find abstract variable associated with Var.
+ DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
+ const DILocalVariable *&Cleansed);
+ DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
+ void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope);
+ void ensureAbstractVariableIsCreated(InlinedVariable Var,
+ const MDNode *Scope);
+ void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var,
+ const MDNode *Scope);
+
+ DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV);
+
+ /// Construct a DIE for this abstract scope.
+ void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+
+ void finishVariableDefinitions();
+
+ void finishSubprogramDefinitions();
+
+ /// Finish off debug information after all functions have been
+ /// processed.
+ void finalizeModuleInfo();
+
+ /// Emit the debug info section.
+ void emitDebugInfo();
+
+ /// Emit the abbreviation section.
+ void emitAbbreviations();
+
+ /// Emit a specified accelerator table.
+ void emitAccel(DwarfAccelTable &Accel, MCSection *Section,
+ StringRef TableName);
+
+ /// Emit visible names into a hashed accelerator table section.
+ void emitAccelNames();
+
+ /// Emit objective C classes and categories into a hashed
+ /// accelerator table section.
+ void emitAccelObjC();
+
+ /// Emit namespace dies into a hashed accelerator table.
+ void emitAccelNamespaces();
+
+ /// Emit type dies into a hashed accelerator table.
+ void emitAccelTypes();
+
+ /// Emit visible names into a debug pubnames section.
+ /// \param GnuStyle determines whether or not we want to emit
+ /// additional information into the table ala newer gcc for gdb
+ /// index.
+ void emitDebugPubNames(bool GnuStyle = false);
+
+ /// Emit visible types into a debug pubtypes section.
+ /// \param GnuStyle determines whether or not we want to emit
+ /// additional information into the table ala newer gcc for gdb
+ /// index.
+ void emitDebugPubTypes(bool GnuStyle = false);
+
+ void emitDebugPubSection(
+ bool GnuStyle, MCSection *PSec, StringRef Name,
+ const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
+
+ /// Emit null-terminated strings into a debug str section.
+ void emitDebugStr();
+
+ /// Emit variable locations into a debug loc section.
+ void emitDebugLoc();
+
+ /// Emit variable locations into a debug loc dwo section.
+ void emitDebugLocDWO();
+
+ /// Emit address ranges into a debug aranges section.
+ void emitDebugARanges();
+
+ /// Emit address ranges into a debug ranges section.
+ void emitDebugRanges();
+
+ /// Emit macros into a debug macinfo section.
+ void emitDebugMacinfo();
+ void emitMacro(DIMacro &M);
+ void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U);
+ void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U);
+
+ /// DWARF 5 Experimental Split Dwarf Emitters
+
+ /// Initialize common features of skeleton units.
+ void initSkeletonUnit(const DwarfUnit &U, DIE &Die,
+ std::unique_ptr<DwarfCompileUnit> NewU);
+
+ /// Construct the split debug info compile unit for the debug info
+ /// section.
+ DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
+
+ /// Emit the debug info dwo section.
+ void emitDebugInfoDWO();
+
+ /// Emit the debug abbrev dwo section.
+ void emitDebugAbbrevDWO();
+
+ /// Emit the debug line dwo section.
+ void emitDebugLineDWO();
+
+ /// Emit the debug str dwo section.
+ void emitDebugStrDWO();
+
+ /// Flags to let the linker know we have emitted new style pubnames. Only
+ /// emit it here if we don't have a skeleton CU for split dwarf.
+ void addGnuPubAttributes(DwarfUnit &U, DIE &D) const;
+
+ /// Create new DwarfCompileUnit for the given metadata node with tag
+ /// DW_TAG_compile_unit.
+ DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit);
+
+ /// Construct imported_module or imported_declaration DIE.
+ void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
+ const DIImportedEntity *N);
+
+ /// Register a source line with debug info. Returns the unique
+ /// label that was emitted and which provides correspondence to the
+ /// source line list.
+ void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
+ unsigned Flags);
+
+ /// Populate LexicalScope entries with variables' info.
+ void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
+ DenseSet<InlinedVariable> &ProcessedVars);
+
+ /// Build the location list for all DBG_VALUEs in the
+ /// function that describe the same variable.
+ void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::InstrRanges &Ranges);
+
+ /// Collect variable information from the side table maintained
+ /// by MMI.
+ void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfDebug(AsmPrinter *A, Module *M);
+
+ ~DwarfDebug() override;
+
+ /// Emit all Dwarf sections that should come prior to the
+ /// content.
+ void beginModule();
+
+ /// Emit all Dwarf sections that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function debug information.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function debug information.
+ void endFunction(const MachineFunction *MF) override;
+
+ /// Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI) override;
+
+ /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits.
+ static uint64_t makeTypeSignature(StringRef Identifier);
+
+ /// Add a DIE to the set of types that we're going to pull into
+ /// type units.
+ void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
+ DIE &Die, const DICompositeType *CTy);
+
+ /// Add a label so that arange data can be generated for it.
+ void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
+
+ /// For symbols that have a size designated (e.g. common symbols),
+ /// this tracks that size.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {
+ SymSize[Sym] = Size;
+ }
+
+ /// Returns whether we should emit all DW_AT_[MIPS_]linkage_name.
+ /// If not, we still might emit certain cases.
+ bool useAllLinkageNames() const { return UseAllLinkageNames; }
+
+ /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
+ /// standard DW_OP_form_tls_address opcode
+ bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
+
+ /// Returns whether to use the DWARF2 format for bitfields instyead of the
+ /// DWARF4 format.
+ bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; }
+
+ // Experimental DWARF5 features.
+
+ /// Returns whether or not to emit tables that dwarf consumers can
+ /// use to accelerate lookup.
+ bool useDwarfAccelTables() const { return HasDwarfAccelTables; }
+
+ bool useAppleExtensionAttributes() const {
+ return HasAppleExtensionAttributes;
+ }
+
+ /// Returns whether or not to change the current debug info for the
+ /// split dwarf proposal support.
+ bool useSplitDwarf() const { return HasSplitDwarf; }
+
+ /// Returns the Dwarf Version.
+ unsigned getDwarfVersion() const { return DwarfVersion; }
+
+ /// Returns the previous CU that was being updated
+ const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
+ void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
+
+ /// Returns the entries for the .debug_loc section.
+ const DebugLocStream &getDebugLocs() const { return DebugLocs; }
+
+ /// Emit an entry for the debug loc section. This can be used to
+ /// handle an entry that's going to be emitted into the debug loc section.
+ void emitDebugLocEntry(ByteStreamer &Streamer,
+ const DebugLocStream::Entry &Entry);
+
+ /// Emit the location for a debug loc entry, including the size header.
+ void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry);
+
+ /// Find the MDNode for the given reference.
+ template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
+ return Ref.resolve();
+ }
+
+ /// Find the DwarfCompileUnit for the given CU Die.
+ DwarfCompileUnit *lookupUnit(const DIE *CU) const {
+ return CUDieMap.lookup(CU);
+ }
+
+ void addSubprogramNames(const DISubprogram *SP, DIE &Die);
+
+ AddressPool &getAddressPool() { return AddrPool; }
+
+ void addAccelName(StringRef Name, const DIE &Die);
+
+ void addAccelObjC(StringRef Name, const DIE &Die);
+
+ void addAccelNamespace(StringRef Name, const DIE &Die);
+
+ void addAccelType(StringRef Name, const DIE &Die, char Flags);
+
+ const MachineFunction *getCurrentFunction() const { return CurFn; }
+
+ /// A helper function to check whether the DIE for a given Scope is
+ /// going to be null.
+ bool isLexicalScopeDIENull(LexicalScope *Scope);
+
+ // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
+
+ SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
+ return ProcessedSPNodes;
+ }
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 000000000000..8287f289f22b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,95 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+
+#include "EHStreamer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCDwarf.h"
+
+namespace llvm {
+class MachineFunction;
+class ARMTargetStreamer;
+
+class LLVM_LIBRARY_VISIBILITY DwarfCFIExceptionBase : public EHStreamer {
+protected:
+ DwarfCFIExceptionBase(AsmPrinter *A);
+
+ /// Per-function flag to indicate if frame CFI info should be emitted.
+ bool shouldEmitCFI;
+
+ void markFunctionEnd() override;
+ void endFragment() override;
+};
+
+class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
+ /// Per-function flag to indicate if .cfi_personality should be emitted.
+ bool shouldEmitPersonality;
+
+ /// Per-function flag to indicate if .cfi_personality must be emitted.
+ bool forceEmitPersonality;
+
+ /// Per-function flag to indicate if .cfi_lsda should be emitted.
+ bool shouldEmitLSDA;
+
+ /// Per-function flag to indicate if frame moves info should be emitted.
+ bool shouldEmitMoves;
+
+ AsmPrinter::CFIMoveType moveTypeModule;
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfCFIException(AsmPrinter *A);
+ ~DwarfCFIException() override;
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+
+ void beginFragment(const MachineBasicBlock *MBB,
+ ExceptionSymbolProvider ESP) override;
+};
+
+class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase {
+ void emitTypeInfos(unsigned TTypeEncoding) override;
+ ARMTargetStreamer &getTargetStreamer();
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ ARMException(AsmPrinter *A);
+ ~ARMException() override;
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+};
+} // End of namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
new file mode 100644
index 000000000000..7dbc6cb39951
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -0,0 +1,285 @@
+//===-- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfExpression.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+void DwarfExpression::AddReg(int DwarfReg, const char *Comment) {
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ if (DwarfReg < 32) {
+ EmitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+ } else {
+ EmitOp(dwarf::DW_OP_regx, Comment);
+ EmitUnsigned(DwarfReg);
+ }
+}
+
+void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) {
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ if (DwarfReg < 32) {
+ EmitOp(dwarf::DW_OP_breg0 + DwarfReg);
+ } else {
+ EmitOp(dwarf::DW_OP_bregx);
+ EmitUnsigned(DwarfReg);
+ }
+ EmitSigned(Offset);
+ if (Deref)
+ EmitOp(dwarf::DW_OP_deref);
+}
+
+void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+ assert(SizeInBits > 0 && "piece has size zero");
+ const unsigned SizeOfByte = 8;
+ if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
+ EmitOp(dwarf::DW_OP_bit_piece);
+ EmitUnsigned(SizeInBits);
+ EmitUnsigned(OffsetInBits);
+ } else {
+ EmitOp(dwarf::DW_OP_piece);
+ unsigned ByteSize = SizeInBits / SizeOfByte;
+ EmitUnsigned(ByteSize);
+ }
+}
+
+void DwarfExpression::AddShr(unsigned ShiftBy) {
+ EmitOp(dwarf::DW_OP_constu);
+ EmitUnsigned(ShiftBy);
+ EmitOp(dwarf::DW_OP_shr);
+}
+
+bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI,
+ unsigned MachineReg, int Offset) {
+ if (isFrameRegister(TRI, MachineReg)) {
+ // If variable offset is based in frame register then use fbreg.
+ EmitOp(dwarf::DW_OP_fbreg);
+ EmitSigned(Offset);
+ return true;
+ }
+
+ int DwarfReg = TRI.getDwarfRegNum(MachineReg, false);
+ if (DwarfReg < 0)
+ return false;
+
+ AddRegIndirect(DwarfReg, Offset);
+ return true;
+}
+
+bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
+ unsigned MachineReg,
+ unsigned PieceSizeInBits,
+ unsigned PieceOffsetInBits) {
+ if (!TRI.isPhysicalRegister(MachineReg))
+ return false;
+
+ int Reg = TRI.getDwarfRegNum(MachineReg, false);
+
+ // If this is a valid register number, emit it.
+ if (Reg >= 0) {
+ AddReg(Reg);
+ if (PieceSizeInBits)
+ AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
+ return true;
+ }
+
+ // Walk up the super-register chain until we find a valid number.
+ // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
+ for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+ Reg = TRI.getDwarfRegNum(*SR, false);
+ if (Reg >= 0) {
+ unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
+ unsigned Size = TRI.getSubRegIdxSize(Idx);
+ unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
+ AddReg(Reg, "super-register");
+ if (PieceOffsetInBits == RegOffset) {
+ AddOpPiece(Size, RegOffset);
+ } else {
+ // If this is part of a variable in a sub-register at a
+ // non-zero offset, we need to manually shift the value into
+ // place, since the DW_OP_piece describes the part of the
+ // variable, not the position of the subregister.
+ if (RegOffset)
+ AddShr(RegOffset);
+ AddOpPiece(Size, PieceOffsetInBits);
+ }
+ return true;
+ }
+ }
+
+ // Otherwise, attempt to find a covering set of sub-register numbers.
+ // For example, Q0 on ARM is a composition of D0+D1.
+ //
+ // Keep track of the current position so we can emit the more
+ // efficient DW_OP_piece.
+ unsigned CurPos = PieceOffsetInBits;
+ // The size of the register in bits, assuming 8 bits per byte.
+ unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
+ // Keep track of the bits in the register we already emitted, so we
+ // can avoid emitting redundant aliasing subregs.
+ SmallBitVector Coverage(RegSize, false);
+ for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+ unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR);
+ unsigned Size = TRI.getSubRegIdxSize(Idx);
+ unsigned Offset = TRI.getSubRegIdxOffset(Idx);
+ Reg = TRI.getDwarfRegNum(*SR, false);
+
+ // Intersection between the bits we already emitted and the bits
+ // covered by this subregister.
+ SmallBitVector Intersection(RegSize, false);
+ Intersection.set(Offset, Offset + Size);
+ Intersection ^= Coverage;
+
+ // If this sub-register has a DWARF number and we haven't covered
+ // its range, emit a DWARF piece for it.
+ if (Reg >= 0 && Intersection.any()) {
+ AddReg(Reg, "sub-register");
+ AddOpPiece(Size, Offset == CurPos ? 0 : Offset);
+ CurPos = Offset + Size;
+
+ // Mark it as emitted.
+ Coverage.set(Offset, Offset + Size);
+ }
+ }
+
+ return CurPos > PieceOffsetInBits;
+}
+
+void DwarfExpression::AddStackValue() {
+ if (DwarfVersion >= 4)
+ EmitOp(dwarf::DW_OP_stack_value);
+}
+
+void DwarfExpression::AddSignedConstant(int64_t Value) {
+ EmitOp(dwarf::DW_OP_consts);
+ EmitSigned(Value);
+ AddStackValue();
+}
+
+void DwarfExpression::AddUnsignedConstant(uint64_t Value) {
+ EmitOp(dwarf::DW_OP_constu);
+ EmitUnsigned(Value);
+ AddStackValue();
+}
+
+void DwarfExpression::AddUnsignedConstant(const APInt &Value) {
+ unsigned Size = Value.getBitWidth();
+ const uint64_t *Data = Value.getRawData();
+
+ // Chop it up into 64-bit pieces, because that's the maximum that
+ // AddUnsignedConstant takes.
+ unsigned Offset = 0;
+ while (Offset < Size) {
+ AddUnsignedConstant(*Data++);
+ if (Offset == 0 && Size <= 64)
+ break;
+ AddOpPiece(std::min(Size-Offset, 64u), Offset);
+ Offset += 64;
+ }
+}
+
+static unsigned getOffsetOrZero(unsigned OffsetInBits,
+ unsigned PieceOffsetInBits) {
+ if (OffsetInBits == PieceOffsetInBits)
+ return 0;
+ assert(OffsetInBits >= PieceOffsetInBits && "overlapping pieces");
+ return OffsetInBits;
+}
+
+bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
+ const DIExpression *Expr,
+ unsigned MachineReg,
+ unsigned PieceOffsetInBits) {
+ auto I = Expr->expr_op_begin();
+ auto E = Expr->expr_op_end();
+ if (I == E)
+ return AddMachineRegPiece(TRI, MachineReg);
+
+ // Pattern-match combinations for which more efficient representations exist
+ // first.
+ bool ValidReg = false;
+ switch (I->getOp()) {
+ case dwarf::DW_OP_bit_piece: {
+ unsigned OffsetInBits = I->getArg(0);
+ unsigned SizeInBits = I->getArg(1);
+ // Piece always comes at the end of the expression.
+ return AddMachineRegPiece(TRI, MachineReg, SizeInBits,
+ getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+ }
+ case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_minus: {
+ // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
+ // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
+ auto N = I.getNext();
+ if (N != E && N->getOp() == dwarf::DW_OP_deref) {
+ unsigned Offset = I->getArg(0);
+ ValidReg = AddMachineRegIndirect(
+ TRI, MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
+ std::advance(I, 2);
+ break;
+ } else
+ ValidReg = AddMachineRegPiece(TRI, MachineReg);
+ }
+ case dwarf::DW_OP_deref: {
+ // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+ ValidReg = AddMachineRegIndirect(TRI, MachineReg);
+ ++I;
+ break;
+ }
+ default:
+ llvm_unreachable("unsupported operand");
+ }
+
+ if (!ValidReg)
+ return false;
+
+ // Emit remaining elements of the expression.
+ AddExpression(I, E, PieceOffsetInBits);
+ return true;
+}
+
+void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I,
+ DIExpression::expr_op_iterator E,
+ unsigned PieceOffsetInBits) {
+ for (; I != E; ++I) {
+ switch (I->getOp()) {
+ case dwarf::DW_OP_bit_piece: {
+ unsigned OffsetInBits = I->getArg(0);
+ unsigned SizeInBits = I->getArg(1);
+ AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+ break;
+ }
+ case dwarf::DW_OP_plus:
+ EmitOp(dwarf::DW_OP_plus_uconst);
+ EmitUnsigned(I->getArg(0));
+ break;
+ case dwarf::DW_OP_minus:
+ // There is no OP_minus_uconst.
+ EmitOp(dwarf::DW_OP_constu);
+ EmitUnsigned(I->getArg(0));
+ EmitOp(dwarf::DW_OP_minus);
+ break;
+ case dwarf::DW_OP_deref:
+ EmitOp(dwarf::DW_OP_deref);
+ break;
+ default:
+ llvm_unreachable("unhandled opcode found in expression");
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
new file mode 100644
index 000000000000..5fff28d8a13c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -0,0 +1,152 @@
+//===-- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
+
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class ByteStreamer;
+class TargetRegisterInfo;
+class DwarfUnit;
+class DIELoc;
+
+/// Base class containing the logic for constructing DWARF expressions
+/// independently of whether they are emitted into a DIE or into a .debug_loc
+/// entry.
+class DwarfExpression {
+protected:
+ // Various convenience accessors that extract things out of AsmPrinter.
+ unsigned DwarfVersion;
+
+public:
+ DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
+ virtual ~DwarfExpression() {}
+
+ /// Output a dwarf operand and an optional assembler comment.
+ virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
+ /// Emit a raw signed value.
+ virtual void EmitSigned(int64_t Value) = 0;
+ /// Emit a raw unsigned value.
+ virtual void EmitUnsigned(uint64_t Value) = 0;
+ /// Return whether the given machine register is the frame register in the
+ /// current function.
+ virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
+
+ /// Emit a dwarf register operation.
+ void AddReg(int DwarfReg, const char *Comment = nullptr);
+ /// Emit an (double-)indirect dwarf register operation.
+ void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false);
+
+ /// Emit a dwarf register operation for describing
+ /// - a small value occupying only part of a register or
+ /// - a register representing only part of a value.
+ void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
+ /// Emit a shift-right dwarf expression.
+ void AddShr(unsigned ShiftBy);
+ /// Emit a DW_OP_stack_value, if supported.
+ ///
+ /// The proper way to describe a constant value is
+ /// DW_OP_constu <const>, DW_OP_stack_value.
+ /// Unfortunately, DW_OP_stack_value was not available until DWARF-4,
+ /// so we will continue to generate DW_OP_constu <const> for DWARF-2
+ /// and DWARF-3. Technically, this is incorrect since DW_OP_const <const>
+ /// actually describes a value at a constant addess, not a constant value.
+ /// However, in the past there was no better way to describe a constant
+ /// value, so the producers and consumers started to rely on heuristics
+ /// to disambiguate the value vs. location status of the expression.
+ /// See PR21176 for more details.
+ void AddStackValue();
+
+ /// Emit an indirect dwarf register operation for the given machine register.
+ /// \return false if no DWARF register exists for MachineReg.
+ bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ int Offset = 0);
+
+ /// \brief Emit a partial DWARF register operation.
+ /// \param MachineReg the register
+ /// \param PieceSizeInBits size and
+ /// \param PieceOffsetInBits offset of the piece in bits, if this is one
+ /// piece of an aggregate value.
+ ///
+ /// If size and offset is zero an operation for the entire
+ /// register is emitted: Some targets do not provide a DWARF
+ /// register number for every register. If this is the case, this
+ /// function will attempt to emit a DWARF register by emitting a
+ /// piece of a super-register or by piecing together multiple
+ /// subregisters that alias the register.
+ ///
+ /// \return false if no DWARF register exists for MachineReg.
+ bool AddMachineRegPiece(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ unsigned PieceSizeInBits = 0,
+ unsigned PieceOffsetInBits = 0);
+
+ /// Emit a signed constant.
+ void AddSignedConstant(int64_t Value);
+ /// Emit an unsigned constant.
+ void AddUnsignedConstant(uint64_t Value);
+ /// Emit an unsigned constant.
+ void AddUnsignedConstant(const APInt &Value);
+
+ /// \brief Emit an entire expression on top of a machine register location.
+ ///
+ /// \param PieceOffsetInBits If this is one piece out of a fragmented
+ /// location, this is the offset of the piece inside the entire variable.
+ /// \return false if no DWARF register exists for MachineReg.
+ bool AddMachineRegExpression(const TargetRegisterInfo &TRI,
+ const DIExpression *Expr, unsigned MachineReg,
+ unsigned PieceOffsetInBits = 0);
+ /// Emit a the operations remaining the DIExpressionIterator I.
+ /// \param PieceOffsetInBits If this is one piece out of a fragmented
+ /// location, this is the offset of the piece inside the entire variable.
+ void AddExpression(DIExpression::expr_op_iterator I,
+ DIExpression::expr_op_iterator E,
+ unsigned PieceOffsetInBits = 0);
+};
+
+/// DwarfExpression implementation for .debug_loc entries.
+class DebugLocDwarfExpression : public DwarfExpression {
+ ByteStreamer &BS;
+
+public:
+ DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
+ : DwarfExpression(DwarfVersion), BS(BS) {}
+
+ void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void EmitSigned(int64_t Value) override;
+ void EmitUnsigned(uint64_t Value) override;
+ bool isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) override;
+};
+
+/// DwarfExpression implementation for singular DW_AT_location.
+class DIEDwarfExpression : public DwarfExpression {
+const AsmPrinter &AP;
+ DwarfUnit &DU;
+ DIELoc &DIE;
+
+public:
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
+ void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void EmitSigned(int64_t Value) override;
+ void EmitUnsigned(uint64_t Value) override;
+ bool isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
new file mode 100644
index 000000000000..e9fe98ab3cf9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -0,0 +1,180 @@
+//===-- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfFile.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "DwarfUnit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
+ : Asm(AP), StrPool(DA, *Asm, Pref) {}
+
+DwarfFile::~DwarfFile() {
+ for (DIEAbbrev *Abbrev : Abbreviations)
+ Abbrev->~DIEAbbrev();
+}
+
+// Define a unique number for the abbreviation.
+//
+DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) {
+ FoldingSetNodeID ID;
+ DIEAbbrev Abbrev = Die.generateAbbrev();
+ Abbrev.Profile(ID);
+
+ void *InsertPos;
+ if (DIEAbbrev *Existing =
+ AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+ Die.setAbbrevNumber(Existing->getNumber());
+ return *Existing;
+ }
+
+ // Move the abbreviation to the heap and assign a number.
+ DIEAbbrev *New = new (AbbrevAllocator) DIEAbbrev(std::move(Abbrev));
+ Abbreviations.push_back(New);
+ New->setNumber(Abbreviations.size());
+ Die.setAbbrevNumber(Abbreviations.size());
+
+ // Store it for lookup.
+ AbbreviationsSet.InsertNode(New, InsertPos);
+ return *New;
+}
+
+void DwarfFile::addUnit(std::unique_ptr<DwarfCompileUnit> U) {
+ CUs.push_back(std::move(U));
+}
+
+// Emit the various dwarf units to the unit section USection with
+// the abbreviations going into ASection.
+void DwarfFile::emitUnits(bool UseOffsets) {
+ for (const auto &TheU : CUs)
+ emitUnit(TheU.get(), UseOffsets);
+}
+
+void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
+ DIE &Die = TheU->getUnitDie();
+ MCSection *USection = TheU->getSection();
+ Asm->OutStreamer->SwitchSection(USection);
+
+ TheU->emitHeader(UseOffsets);
+
+ Asm->emitDwarfDIE(Die);
+}
+
+// Compute the size and offset for each DIE.
+void DwarfFile::computeSizeAndOffsets() {
+ // Offset from the first CU in the debug info section is 0 initially.
+ unsigned SecOffset = 0;
+
+ // Iterate over each compile unit and set the size and offsets for each
+ // DIE within each compile unit. All offsets are CU relative.
+ for (const auto &TheU : CUs) {
+ TheU->setDebugInfoOffset(SecOffset);
+ SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
+ }
+}
+
+unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) {
+ // CU-relative offset is reset to 0 here.
+ unsigned Offset = sizeof(int32_t) + // Length of Unit Info
+ TheU->getHeaderSize(); // Unit-specific headers
+
+ // The return value here is CU-relative, after laying out
+ // all of the CU DIE.
+ return computeSizeAndOffset(TheU->getUnitDie(), Offset);
+}
+
+// Compute the size and offset of a DIE. The offset is relative to start of the
+// CU. It returns the offset after laying out the DIE.
+unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
+ // Record the abbreviation.
+ const DIEAbbrev &Abbrev = assignAbbrevNumber(Die);
+
+ // Set DIE offset
+ Die.setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += getULEB128Size(Die.getAbbrevNumber());
+
+ // Size the DIE attribute values.
+ for (const auto &V : Die.values())
+ // Size attribute value.
+ Offset += V.SizeOf(Asm);
+
+ // Size the DIE children if any.
+ if (Die.hasChildren()) {
+ (void)Abbrev;
+ assert(Abbrev.hasChildren() && "Children flag not set");
+
+ for (auto &Child : Die.children())
+ Offset = computeSizeAndOffset(Child, Offset);
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die.setSize(Offset - Die.getOffset());
+ return Offset;
+}
+
+void DwarfFile::emitAbbrevs(MCSection *Section) {
+ // Check to see if it is worth the effort.
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ Asm->OutStreamer->SwitchSection(Section);
+ Asm->emitDwarfAbbrevs(Abbreviations);
+ }
+}
+
+// Emit strings into a string section.
+void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) {
+ StrPool.emit(*Asm, StrSection, OffsetSection);
+}
+
+bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+ SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
+ const DILocalVariable *DV = Var->getVariable();
+ // Variables with positive arg numbers are parameters.
+ if (unsigned ArgNum = DV->getArg()) {
+ // Keep all parameters in order at the start of the variable list to ensure
+ // function types are correct (no out-of-order parameters)
+ //
+ // This could be improved by only doing it for optimized builds (unoptimized
+ // builds have the right order to begin with), searching from the back (this
+ // would catch the unoptimized case quickly), or doing a binary search
+ // rather than linear search.
+ auto I = Vars.begin();
+ while (I != Vars.end()) {
+ unsigned CurNum = (*I)->getVariable()->getArg();
+ // A local (non-parameter) variable has been found, insert immediately
+ // before it.
+ if (CurNum == 0)
+ break;
+ // A later indexed parameter has been found, insert immediately before it.
+ if (CurNum > ArgNum)
+ break;
+ if (CurNum == ArgNum) {
+ (*I)->addMMIEntry(*Var);
+ return false;
+ }
+ ++I;
+ }
+ Vars.insert(I, Var);
+ return true;
+ }
+
+ Vars.push_back(Var);
+ return true;
+}
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
new file mode 100644
index 000000000000..b73d89b0e499
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -0,0 +1,128 @@
+//===-- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework -------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H
+
+#include "AddressPool.h"
+#include "DwarfStringPool.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Allocator.h"
+#include <memory>
+#include <vector>
+
+namespace llvm {
+class AsmPrinter;
+class DbgVariable;
+class DwarfCompileUnit;
+class DwarfUnit;
+class DIEAbbrev;
+class MCSymbol;
+class DIE;
+class LexicalScope;
+class StringRef;
+class DwarfDebug;
+class MCSection;
+class MDNode;
+class DwarfFile {
+ // Target of Dwarf emission, used for sizing of abbreviations.
+ AsmPrinter *Asm;
+
+ BumpPtrAllocator AbbrevAllocator;
+
+ // Used to uniquely define abbreviations.
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+ // A list of all the unique abbreviations in use.
+ std::vector<DIEAbbrev *> Abbreviations;
+
+ // A pointer to all units in the section.
+ SmallVector<std::unique_ptr<DwarfCompileUnit>, 1> CUs;
+
+ DwarfStringPool StrPool;
+
+ // Collection of dbg variables of a scope.
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> ScopeVariables;
+
+ // Collection of abstract subprogram DIEs.
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
+ /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
+ /// be shared across CUs, that is why we keep the map here instead
+ /// of in DwarfCompileUnit.
+ DenseMap<const MDNode *, DIE *> DITypeNodeToDieMap;
+
+public:
+ DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
+
+ ~DwarfFile();
+
+ const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
+ return CUs;
+ }
+
+ /// \brief Compute the size and offset of a DIE given an incoming Offset.
+ unsigned computeSizeAndOffset(DIE &Die, unsigned Offset);
+
+ /// \brief Compute the size and offset of all the DIEs.
+ void computeSizeAndOffsets();
+
+ /// \brief Compute the size and offset of all the DIEs in the given unit.
+ /// \returns The size of the root DIE.
+ unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU);
+
+ /// Define a unique number for the abbreviation.
+ ///
+ /// Compute the abbreviation for \c Die, look up its unique number, and
+ /// return a reference to it in the uniquing table.
+ DIEAbbrev &assignAbbrevNumber(DIE &Die);
+
+ /// \brief Add a unit to the list of CUs.
+ void addUnit(std::unique_ptr<DwarfCompileUnit> U);
+
+ /// \brief Emit all of the units to the section listed with the given
+ /// abbreviation section.
+ void emitUnits(bool UseOffsets);
+
+ /// \brief Emit the given unit to its section.
+ void emitUnit(DwarfUnit *U, bool UseOffsets);
+
+ /// \brief Emit a set of abbreviations to the specific section.
+ void emitAbbrevs(MCSection *);
+
+ /// \brief Emit all of the strings to the section given.
+ void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr);
+
+ /// \brief Returns the string pool.
+ DwarfStringPool &getStringPool() { return StrPool; }
+
+ /// \returns false if the variable was merged with a previous one.
+ bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
+
+ DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() {
+ return ScopeVariables;
+ }
+
+ DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
+ return AbstractSPDies;
+ }
+
+ void insertDIE(const MDNode *TypeMD, DIE *Die) {
+ DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
+ }
+ DIE *getDIE(const MDNode *TypeMD) {
+ return DITypeNodeToDieMap.lookup(TypeMD);
+ }
+};
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
new file mode 100644
index 000000000000..2066f745e318
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -0,0 +1,74 @@
+//===-- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfStringPool.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm,
+ StringRef Prefix)
+ : Pool(A), Prefix(Prefix),
+ ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {}
+
+DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
+ StringRef Str) {
+ auto I = Pool.insert(std::make_pair(Str, EntryTy()));
+ if (I.second) {
+ auto &Entry = I.first->second;
+ Entry.Index = Pool.size() - 1;
+ Entry.Offset = NumBytes;
+ Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
+
+ NumBytes += Str.size() + 1;
+ assert(NumBytes > Entry.Offset && "Unexpected overflow");
+ }
+ return EntryRef(*I.first);
+}
+
+void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
+ MCSection *OffsetSection) {
+ if (Pool.empty())
+ return;
+
+ // Start the dwarf str section.
+ Asm.OutStreamer->SwitchSection(StrSection);
+
+ // Get all of the string pool entries and put them in an array by their ID so
+ // we can sort them.
+ SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size());
+
+ for (const auto &E : Pool)
+ Entries[E.getValue().Index] = &E;
+
+ for (const auto &Entry : Entries) {
+ assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) &&
+ "Mismatch between setting and entry");
+
+ // Emit a label for reference from debug information entries.
+ if (ShouldCreateSymbols)
+ Asm.OutStreamer->EmitLabel(Entry->getValue().Symbol);
+
+ // Emit the string itself with a terminating null byte.
+ Asm.OutStreamer->AddComment("string offset=" +
+ Twine(Entry->getValue().Offset));
+ Asm.OutStreamer->EmitBytes(
+ StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1));
+ }
+
+ // If we've got an offset section go ahead and emit that now as well.
+ if (OffsetSection) {
+ Asm.OutStreamer->SwitchSection(OffsetSection);
+ unsigned size = 4; // FIXME: DWARF64 is 8.
+ for (const auto &Entry : Entries)
+ Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
new file mode 100644
index 000000000000..93a168485a54
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -0,0 +1,49 @@
+//===-- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework -*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/Support/Allocator.h"
+#include <utility>
+
+namespace llvm {
+
+class AsmPrinter;
+class MCSymbol;
+class MCSection;
+class StringRef;
+
+// Collection of strings for this unit and assorted symbols.
+// A String->Symbol mapping of strings used by indirect
+// references.
+class DwarfStringPool {
+ typedef DwarfStringPoolEntry EntryTy;
+ StringMap<EntryTy, BumpPtrAllocator &> Pool;
+ StringRef Prefix;
+ unsigned NumBytes = 0;
+ bool ShouldCreateSymbols;
+
+public:
+ typedef DwarfStringPoolEntryRef EntryRef;
+
+ DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix);
+
+ void emit(AsmPrinter &Asm, MCSection *StrSection,
+ MCSection *OffsetSection = nullptr);
+
+ bool empty() const { return Pool.empty(); }
+
+ /// Get a reference to an entry in the string pool.
+ EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
+};
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
new file mode 100644
index 000000000000..4100d728a53b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -0,0 +1,1541 @@
+//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for constructing a dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfUnit.h"
+#include "DwarfAccelTable.h"
+#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
+#include "DwarfExpression.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfdebug"
+
+static cl::opt<bool>
+GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
+ cl::desc("Generate DWARF4 type units."),
+ cl::init(false));
+
+DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
+ DIELoc &DIE)
+ : DwarfExpression(AP.getDwarfDebug()->getDwarfVersion()), AP(AP), DU(DU),
+ DIE(DIE) {}
+
+void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
+ DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
+}
+void DIEDwarfExpression::EmitSigned(int64_t Value) {
+ DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
+}
+void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
+ DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+}
+bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) {
+ return MachineReg == TRI.getFrameRegister(*AP.MF);
+}
+
+DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
+ AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
+ : CUNode(Node), UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), Asm(A),
+ DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
+ assert(UnitTag == dwarf::DW_TAG_compile_unit ||
+ UnitTag == dwarf::DW_TAG_type_unit);
+}
+
+DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU,
+ MCDwarfDwoLineTable *SplitLineTable)
+ : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
+ SplitLineTable(SplitLineTable) {
+ if (SplitLineTable)
+ addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0);
+}
+
+DwarfUnit::~DwarfUnit() {
+ for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+ DIEBlocks[j]->~DIEBlock();
+ for (unsigned j = 0, M = DIELocs.size(); j < M; ++j)
+ DIELocs[j]->~DIELoc();
+}
+
+int64_t DwarfUnit::getDefaultLowerBound() const {
+ switch (getLanguage()) {
+ default:
+ break;
+
+ case dwarf::DW_LANG_C89:
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C_plus_plus:
+ case dwarf::DW_LANG_ObjC:
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ return 0;
+
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ case dwarf::DW_LANG_Fortran95:
+ return 1;
+
+ // The languages below have valid values only if the DWARF version >= 4.
+ case dwarf::DW_LANG_Java:
+ case dwarf::DW_LANG_Python:
+ case dwarf::DW_LANG_UPC:
+ case dwarf::DW_LANG_D:
+ if (dwarf::DWARF_VERSION >= 4)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Ada83:
+ case dwarf::DW_LANG_Ada95:
+ case dwarf::DW_LANG_Cobol74:
+ case dwarf::DW_LANG_Cobol85:
+ case dwarf::DW_LANG_Modula2:
+ case dwarf::DW_LANG_Pascal83:
+ case dwarf::DW_LANG_PLI:
+ if (dwarf::DWARF_VERSION >= 4)
+ return 1;
+ break;
+
+ // The languages below have valid values only if the DWARF version >= 5.
+ case dwarf::DW_LANG_OpenCL:
+ case dwarf::DW_LANG_Go:
+ case dwarf::DW_LANG_Haskell:
+ case dwarf::DW_LANG_C_plus_plus_03:
+ case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_OCaml:
+ case dwarf::DW_LANG_Rust:
+ case dwarf::DW_LANG_C11:
+ case dwarf::DW_LANG_Swift:
+ case dwarf::DW_LANG_Dylan:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ if (dwarf::DWARF_VERSION >= 5)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Modula3:
+ case dwarf::DW_LANG_Julia:
+ case dwarf::DW_LANG_Fortran03:
+ case dwarf::DW_LANG_Fortran08:
+ if (dwarf::DWARF_VERSION >= 5)
+ return 1;
+ break;
+ }
+
+ return -1;
+}
+
+/// Check whether the DIE for this MDNode can be shared across CUs.
+static bool isShareableAcrossCUs(const DINode *D) {
+ // When the MDNode can be part of the type system, the DIE can be shared
+ // across CUs.
+ // Combining type units and cross-CU DIE sharing is lower value (since
+ // cross-CU DIE sharing is used in LTO and removes type redundancy at that
+ // level already) but may be implementable for some value in projects
+ // building multiple independent libraries with LTO and then linking those
+ // together.
+ return (isa<DIType>(D) ||
+ (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
+ !GenerateDwarfTypeUnits;
+}
+
+DIE *DwarfUnit::getDIE(const DINode *D) const {
+ if (isShareableAcrossCUs(D))
+ return DU->getDIE(D);
+ return MDNodeToDieMap.lookup(D);
+}
+
+void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) {
+ if (isShareableAcrossCUs(Desc)) {
+ DU->insertDIE(Desc, D);
+ return;
+ }
+ MDNodeToDieMap.insert(std::make_pair(Desc, D));
+}
+
+void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
+ if (DD->getDwarfVersion() >= 4)
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present,
+ DIEInteger(1));
+ else
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag,
+ DIEInteger(1));
+}
+
+void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, uint64_t Integer) {
+ if (!Form)
+ Form = DIEInteger::BestForm(false, Integer);
+ Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
+}
+
+void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
+ uint64_t Integer) {
+ addUInt(Block, (dwarf::Attribute)0, Form, Integer);
+}
+
+void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, int64_t Integer) {
+ if (!Form)
+ Form = DIEInteger::BestForm(true, Integer);
+ Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
+}
+
+void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
+ int64_t Integer) {
+ addSInt(Die, (dwarf::Attribute)0, Form, Integer);
+}
+
+void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
+ StringRef String) {
+ Die.addValue(DIEValueAllocator, Attribute,
+ isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp,
+ DIEString(DU->getStringPool().getEntry(*Asm, String)));
+}
+
+DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die,
+ dwarf::Attribute Attribute,
+ dwarf::Form Form,
+ const MCSymbol *Label) {
+ return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label));
+}
+
+void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
+ addLabel(Die, (dwarf::Attribute)0, Form, Label);
+}
+
+void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
+ uint64_t Integer) {
+ if (DD->getDwarfVersion() >= 4)
+ addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer);
+ else
+ addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
+}
+
+unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) {
+ return SplitLineTable ? SplitLineTable->getFile(DirName, FileName)
+ : getCU().getOrCreateSourceID(FileName, DirName);
+}
+
+void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
+ if (!DD->useSplitDwarf()) {
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Die, dwarf::DW_FORM_udata, Sym);
+ } else {
+ addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
+ addUInt(Die, dwarf::DW_FORM_GNU_addr_index,
+ DD->getAddressPool().getIndex(Sym));
+ }
+}
+
+void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
+ const MCSymbol *Hi, const MCSymbol *Lo) {
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_data4,
+ new (DIEValueAllocator) DIEDelta(Hi, Lo));
+}
+
+void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) {
+ addDIEEntry(Die, Attribute, DIEEntry(Entry));
+}
+
+void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) {
+ // Flag the type unit reference as a declaration so that if it contains
+ // members (implicit special members, static data member definitions, member
+ // declarations for definitions in this CU, etc) consumers don't get confused
+ // and think this is a full definition.
+ addFlag(Die, dwarf::DW_AT_declaration);
+
+ Die.addValue(DIEValueAllocator, dwarf::DW_AT_signature,
+ dwarf::DW_FORM_ref_sig8, DIEInteger(Signature));
+}
+
+void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
+ StringRef Identifier) {
+ uint64_t Signature = DD->makeTypeSignature(Identifier);
+ Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8,
+ DIEInteger(Signature));
+}
+
+void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
+ DIEEntry Entry) {
+ const DIE *DieCU = Die.getUnitOrNull();
+ const DIE *EntryCU = Entry.getEntry().getUnitOrNull();
+ if (!DieCU)
+ // We assume that Die belongs to this CU, if it is not linked to any CU yet.
+ DieCU = &getUnitDie();
+ if (!EntryCU)
+ EntryCU = &getUnitDie();
+ Die.addValue(DIEValueAllocator, Attribute,
+ EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ Entry);
+}
+
+DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
+ DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag));
+ if (N)
+ insertDIE(N, &Die);
+ return Die;
+}
+
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
+ Loc->ComputeSize(Asm);
+ DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
+ Die.addValue(DIEValueAllocator, Attribute,
+ Loc->BestForm(DD->getDwarfVersion()), Loc);
+}
+
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
+ DIEBlock *Block) {
+ Block->ComputeSize(Asm);
+ DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+ Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block);
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
+ StringRef Directory) {
+ if (Line == 0)
+ return;
+
+ unsigned FileID = getOrCreateSourceID(File, Directory);
+ assert(FileID && "Invalid file id");
+ addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
+ addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {
+ assert(V);
+
+ addSourceLine(Die, V->getLine(), V->getScope()->getFilename(),
+ V->getScope()->getDirectory());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) {
+ assert(G);
+
+ addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
+ assert(SP);
+
+ addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
+ assert(Ty);
+
+ addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
+ assert(Ty);
+
+ addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
+}
+
+void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) {
+ addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
+}
+
+bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
+ unsigned SizeInBits, unsigned OffsetInBits) {
+ DIEDwarfExpression Expr(*Asm, *this, TheDie);
+ Expr.AddMachineRegPiece(*Asm->MF->getSubtarget().getRegisterInfo(), Reg,
+ SizeInBits, OffsetInBits);
+ return true;
+}
+
+bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
+ int64_t Offset) {
+ DIEDwarfExpression Expr(*Asm, *this, TheDie);
+ return Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+ Reg, Offset);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+ VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+ gives the variable VarName either the struct, or a pointer to the struct, as
+ its type. This is necessary for various behind-the-scenes things the
+ compiler needs to do with by-reference variables in Blocks.
+
+ However, as far as the original *programmer* is concerned, the variable
+ should still have type 'SomeType', as originally declared.
+
+ The function getBlockByrefType dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable, which is then assigned to
+ the variable's Debug Information Entry as its real type. So far, so good.
+ However now the debugger will expect the variable VarName to have the type
+ SomeType. So we need the location attribute for the variable to be an
+ expression that explains to the debugger how to navigate through the
+ pointers and struct to find the actual variable of type SomeType.
+
+ The following function does just that. We start by getting
+ the "normal" location for the variable. This will be the location
+ of either the struct __Block_byref_x_VarName or the pointer to the
+ struct __Block_byref_x_VarName.
+
+ The struct will look something like:
+
+ struct __Block_byref_x_VarName {
+ ... <various fields>
+ struct __Block_byref_x_VarName *forwarding;
+ ... <various other fields>
+ SomeType VarName;
+ ... <maybe more fields>
+ };
+
+ If we are given the struct directly (as our starting point) we
+ need to tell the debugger to:
+
+ 1). Add the offset of the forwarding field.
+
+ 2). Follow that pointer to get the real __Block_byref_x_VarName
+ struct to use (the real one may have been copied onto the heap).
+
+ 3). Add the offset for the field VarName, to find the actual variable.
+
+ If we started with a pointer to the struct, then we need to
+ dereference that pointer first, before the other steps.
+ Translating this into DWARF ops, we will need to append the following
+ to the current location description for the variable:
+
+ DW_OP_deref -- optional, if we start with a pointer
+ DW_OP_plus_uconst <forward_fld_offset>
+ DW_OP_deref
+ DW_OP_plus_uconst <varName_fld_offset>
+
+ That is what this function does. */
+
+void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location) {
+ const DIType *Ty = DV.getType();
+ const DIType *TmpTy = Ty;
+ uint16_t Tag = Ty->getTag();
+ bool isPointer = false;
+
+ StringRef varName = DV.getName();
+
+ if (Tag == dwarf::DW_TAG_pointer_type) {
+ auto *DTy = cast<DIDerivedType>(Ty);
+ TmpTy = resolve(DTy->getBaseType());
+ isPointer = true;
+ }
+
+ // Find the __forwarding field and the variable field in the __Block_byref
+ // struct.
+ DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements();
+ const DIDerivedType *varField = nullptr;
+ const DIDerivedType *forwardingField = nullptr;
+
+ for (unsigned i = 0, N = Fields.size(); i < N; ++i) {
+ auto *DT = cast<DIDerivedType>(Fields[i]);
+ StringRef fieldName = DT->getName();
+ if (fieldName == "__forwarding")
+ forwardingField = DT;
+ else if (fieldName == varName)
+ varField = DT;
+ }
+
+ // Get the offsets for the forwarding field and the variable field.
+ unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3;
+ unsigned varFieldOffset = varField->getOffsetInBits() >> 2;
+
+ // Decode the original location, and use that as the start of the byref
+ // variable's location.
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+
+ bool validReg;
+ if (Location.isReg())
+ validReg = addRegisterOpPiece(*Loc, Location.getReg());
+ else
+ validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+
+ if (!validReg)
+ return;
+
+ // If we started with a pointer to the __Block_byref... struct, then
+ // the first thing we need to do is dereference the pointer (DW_OP_deref).
+ if (isPointer)
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Next add the offset for the '__forwarding' field:
+ // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
+ // adding the offset if it's 0.
+ if (forwardingFieldOffset > 0) {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*Loc, dwarf::DW_FORM_udata, forwardingFieldOffset);
+ }
+
+ // Now dereference the __forwarding field to get to the real __Block_byref
+ // struct: DW_OP_deref.
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+ // Now that we've got the real __Block_byref... struct, add the offset
+ // for the variable's field to get to the location of the actual variable:
+ // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
+ if (varFieldOffset > 0) {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*Loc, dwarf::DW_FORM_udata, varFieldOffset);
+ }
+
+ // Now attach the location information to the DIE.
+ addBlock(Die, Attribute, Loc);
+}
+
+/// Return true if type encoding is unsigned.
+static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
+ return false;
+
+ // (Pieces of) aggregate types that get hacked apart by SROA may be
+ // represented by a constant. Encode them as unsigned bytes.
+ return true;
+ }
+
+ if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ dwarf::Tag T = (dwarf::Tag)Ty->getTag();
+ // Encode pointer constants as unsigned bytes. This is used at least for
+ // null pointer constant emission.
+ // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
+ // here, but accept them for now due to a bug in SROA producing bogus
+ // dbg.values.
+ if (T == dwarf::DW_TAG_pointer_type ||
+ T == dwarf::DW_TAG_ptr_to_member_type ||
+ T == dwarf::DW_TAG_reference_type ||
+ T == dwarf::DW_TAG_rvalue_reference_type)
+ return true;
+ assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
+ T == dwarf::DW_TAG_volatile_type ||
+ T == dwarf::DW_TAG_restrict_type);
+ DITypeRef Deriv = DTy->getBaseType();
+ assert(Deriv && "Expected valid base type");
+ return isUnsignedDIType(DD, DD->resolve(Deriv));
+ }
+
+ auto *BTy = cast<DIBasicType>(Ty);
+ unsigned Encoding = BTy->getEncoding();
+ assert((Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_signed ||
+ Encoding == dwarf::DW_ATE_signed_char ||
+ Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
+ Encoding == dwarf::DW_ATE_boolean ||
+ (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
+ Ty->getName() == "decltype(nullptr)")) &&
+ "Unsupported encoding");
+ return Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+ Ty->getTag() == dwarf::DW_TAG_unspecified_type;
+}
+
+void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
+ assert(MO.isFPImm() && "Invalid machine operand!");
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
+ APFloat FPImm = MO.getFPImm()->getValueAPF();
+
+ // Get the raw data form of the floating point.
+ const APInt FltVal = FPImm.bitcastToAPInt();
+ const char *FltPtr = (const char *)FltVal.getRawData();
+
+ int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getDataLayout().isLittleEndian();
+ int Incr = (LittleEndian ? 1 : -1);
+ int Start = (LittleEndian ? 0 : NumBytes - 1);
+ int Stop = (LittleEndian ? NumBytes : -1);
+
+ // Output the constant to DWARF one byte at a time.
+ for (; Start != Stop; Start += Incr)
+ addUInt(*Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]);
+
+ addBlock(Die, dwarf::DW_AT_const_value, Block);
+}
+
+void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) {
+ // Pass this down to addConstantValue as an unsigned bag of bits.
+ addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI,
+ const DIType *Ty) {
+ addConstantValue(Die, CI->getValue(), Ty);
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO,
+ const DIType *Ty) {
+ assert(MO.isImm() && "Invalid machine operand!");
+
+ addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm());
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
+ // FIXME: This is a bit conservative/simple - it emits negative values always
+ // sign extended to 64 bits rather than minimizing the number of bytes.
+ addUInt(Die, dwarf::DW_AT_const_value,
+ Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val);
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) {
+ addConstantValue(Die, Val, isUnsignedDIType(DD, Ty));
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
+ unsigned CIBitWidth = Val.getBitWidth();
+ if (CIBitWidth <= 64) {
+ addConstantValue(Die, Unsigned,
+ Unsigned ? Val.getZExtValue() : Val.getSExtValue());
+ return;
+ }
+
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
+
+ // Get the raw data form of the large APInt.
+ const uint64_t *Ptr64 = Val.getRawData();
+
+ int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getDataLayout().isLittleEndian();
+
+ // Output the constant to DWARF one byte at a time.
+ for (int i = 0; i < NumBytes; i++) {
+ uint8_t c;
+ if (LittleEndian)
+ c = Ptr64[i / 8] >> (8 * (i & 7));
+ else
+ c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
+ addUInt(*Block, dwarf::DW_FORM_data1, c);
+ }
+
+ addBlock(Die, dwarf::DW_AT_const_value, Block);
+}
+
+void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
+ if (!LinkageName.empty())
+ addString(Die,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
+ : dwarf::DW_AT_MIPS_linkage_name,
+ GlobalValue::getRealLinkageName(LinkageName));
+}
+
+void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
+ // Add template parameters.
+ for (const auto *Element : TParams) {
+ if (auto *TTP = dyn_cast<DITemplateTypeParameter>(Element))
+ constructTemplateTypeParameterDIE(Buffer, TTP);
+ else if (auto *TVP = dyn_cast<DITemplateValueParameter>(Element))
+ constructTemplateValueParameterDIE(Buffer, TVP);
+ }
+}
+
+DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
+ if (!Context || isa<DIFile>(Context))
+ return &getUnitDie();
+ if (auto *T = dyn_cast<DIType>(Context))
+ return getOrCreateTypeDIE(T);
+ if (auto *NS = dyn_cast<DINamespace>(Context))
+ return getOrCreateNameSpace(NS);
+ if (auto *SP = dyn_cast<DISubprogram>(Context))
+ return getOrCreateSubprogramDIE(SP);
+ if (auto *M = dyn_cast<DIModule>(Context))
+ return getOrCreateModule(M);
+ return getDIE(Context);
+}
+
+DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+ auto *Context = resolve(Ty->getScope());
+ DIE *ContextDIE = getOrCreateContextDIE(Context);
+
+ if (DIE *TyDIE = getDIE(Ty))
+ return TyDIE;
+
+ // Create new type.
+ DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
+
+ constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
+
+ if (!Ty->isExternalTypeRef())
+ updateAcceleratorTables(Context, Ty, TyDIE);
+ return &TyDIE;
+}
+
+DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
+ if (!TyNode)
+ return nullptr;
+
+ auto *Ty = cast<DIType>(TyNode);
+
+ // DW_TAG_restrict_type is not supported in DWARF2
+ if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
+ return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ auto *Context = resolve(Ty->getScope());
+ DIE *ContextDIE = getOrCreateContextDIE(Context);
+ assert(ContextDIE);
+
+ if (DIE *TyDIE = getDIE(Ty))
+ return TyDIE;
+
+ // Create new type.
+ DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
+
+ updateAcceleratorTables(Context, Ty, TyDIE);
+
+ if (auto *BT = dyn_cast<DIBasicType>(Ty))
+ constructTypeDIE(TyDIE, BT);
+ else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
+ constructTypeDIE(TyDIE, STy);
+ else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ if (GenerateDwarfTypeUnits && !Ty->isForwardDecl())
+ if (MDString *TypeId = CTy->getRawIdentifier()) {
+ DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
+ // Skip updating the accelerator tables since this is not the full type.
+ return &TyDIE;
+ }
+ constructTypeDIE(TyDIE, CTy);
+ } else {
+ constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
+ }
+
+ return &TyDIE;
+}
+
+void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
+ const DIType *Ty, const DIE &TyDIE) {
+ if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
+ bool IsImplementation = 0;
+ if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
+ // A runtime language of 0 actually means C/C++ and that any
+ // non-negative value is some version of Objective-C/C++.
+ IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
+ }
+ unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
+ DD->addAccelType(Ty->getName(), TyDIE, Flags);
+
+ if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
+ isa<DINamespace>(Context))
+ addGlobalType(Ty, TyDIE, Context);
+ }
+}
+
+void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
+ dwarf::Attribute Attribute) {
+ assert(Ty && "Trying to add a type that doesn't exist?");
+ addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty)));
+}
+
+std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
+ if (!Context)
+ return "";
+
+ // FIXME: Decide whether to implement this for non-C++ languages.
+ if (getLanguage() != dwarf::DW_LANG_C_plus_plus)
+ return "";
+
+ std::string CS;
+ SmallVector<const DIScope *, 1> Parents;
+ while (!isa<DICompileUnit>(Context)) {
+ Parents.push_back(Context);
+ if (Context->getScope())
+ Context = resolve(Context->getScope());
+ else
+ // Structure, etc types will have a NULL context if they're at the top
+ // level.
+ break;
+ }
+
+ // Reverse iterate over our list to go from the outermost construct to the
+ // innermost.
+ for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) {
+ StringRef Name = Ctx->getName();
+ if (Name.empty() && isa<DINamespace>(Ctx))
+ Name = "(anonymous namespace)";
+ if (!Name.empty()) {
+ CS += Name;
+ CS += "::";
+ }
+ }
+ return CS;
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
+ // Get core information.
+ StringRef Name = BTy->getName();
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(Buffer, dwarf::DW_AT_name, Name);
+
+ // An unspecified type only has a name attribute.
+ if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
+ return;
+
+ addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy->getEncoding());
+
+ uint64_t Size = BTy->getSizeInBits() >> 3;
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
+ // Get core information.
+ StringRef Name = DTy->getName();
+ uint64_t Size = DTy->getSizeInBits() >> 3;
+ uint16_t Tag = Buffer.getTag();
+
+ // Map to main type, void will not have a type.
+ const DIType *FromTy = resolve(DTy->getBaseType());
+ if (FromTy)
+ addType(Buffer, FromTy);
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(Buffer, dwarf::DW_AT_name, Name);
+
+ // Add size if non-zero (derived types might be zero-sized.)
+ if (Size && Tag != dwarf::DW_TAG_pointer_type
+ && Tag != dwarf::DW_TAG_ptr_to_member_type
+ && Tag != dwarf::DW_TAG_reference_type
+ && Tag != dwarf::DW_TAG_rvalue_reference_type)
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+
+ if (Tag == dwarf::DW_TAG_ptr_to_member_type)
+ addDIEEntry(
+ Buffer, dwarf::DW_AT_containing_type,
+ *getOrCreateTypeDIE(resolve(cast<DIDerivedType>(DTy)->getClassType())));
+ // Add source line info if available and TyDesc is not a forward declaration.
+ if (!DTy->isForwardDecl())
+ addSourceLine(Buffer, DTy);
+}
+
+void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
+ for (unsigned i = 1, N = Args.size(); i < N; ++i) {
+ const DIType *Ty = resolve(Args[i]);
+ if (!Ty) {
+ assert(i == N-1 && "Unspecified parameter must be the last argument");
+ createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
+ } else {
+ DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
+ addType(Arg, Ty);
+ if (Ty->isArtificial())
+ addFlag(Arg, dwarf::DW_AT_artificial);
+ }
+ }
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
+ // Add return type. A void return won't have a type.
+ auto Elements = cast<DISubroutineType>(CTy)->getTypeArray();
+ if (Elements.size())
+ if (auto RTy = resolve(Elements[0]))
+ addType(Buffer, RTy);
+
+ bool isPrototyped = true;
+ if (Elements.size() == 2 && !Elements[1])
+ isPrototyped = false;
+
+ constructSubprogramArguments(Buffer, Elements);
+
+ // Add prototype flag if we're dealing with a C language and the function has
+ // been prototyped.
+ uint16_t Language = getLanguage();
+ if (isPrototyped &&
+ (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
+ addFlag(Buffer, dwarf::DW_AT_prototyped);
+
+ // Add a DW_AT_calling_convention if this has an explicit convention.
+ if (CTy->getCC() && CTy->getCC() != dwarf::DW_CC_normal)
+ addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1,
+ CTy->getCC());
+
+ if (CTy->isLValueReference())
+ addFlag(Buffer, dwarf::DW_AT_reference);
+
+ if (CTy->isRValueReference())
+ addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
+}
+
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+ if (CTy->isExternalTypeRef()) {
+ StringRef Identifier = CTy->getIdentifier();
+ assert(!Identifier.empty() && "external type ref without identifier");
+ addFlag(Buffer, dwarf::DW_AT_declaration);
+ return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier);
+ }
+
+ // Add name if not anonymous or intermediate type.
+ StringRef Name = CTy->getName();
+
+ uint64_t Size = CTy->getSizeInBits() >> 3;
+ uint16_t Tag = Buffer.getTag();
+
+ switch (Tag) {
+ case dwarf::DW_TAG_array_type:
+ constructArrayTypeDIE(Buffer, CTy);
+ break;
+ case dwarf::DW_TAG_enumeration_type:
+ constructEnumTypeDIE(Buffer, CTy);
+ break;
+ case dwarf::DW_TAG_structure_type:
+ case dwarf::DW_TAG_union_type:
+ case dwarf::DW_TAG_class_type: {
+ // Add elements to structure type.
+ DINodeArray Elements = CTy->getElements();
+ for (const auto *Element : Elements) {
+ if (!Element)
+ continue;
+ if (auto *SP = dyn_cast<DISubprogram>(Element))
+ getOrCreateSubprogramDIE(SP);
+ else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
+ if (DDTy->getTag() == dwarf::DW_TAG_friend) {
+ DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
+ addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);
+ } else if (DDTy->isStaticMember()) {
+ getOrCreateStaticMemberDIE(DDTy);
+ } else {
+ constructMemberDIE(Buffer, DDTy);
+ }
+ } else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) {
+ DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer);
+ StringRef PropertyName = Property->getName();
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
+ if (Property->getType())
+ addType(ElemDie, resolve(Property->getType()));
+ addSourceLine(ElemDie, Property);
+ StringRef GetterName = Property->getGetterName();
+ if (!GetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
+ StringRef SetterName = Property->getSetterName();
+ if (!SetterName.empty())
+ addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
+ if (unsigned PropertyAttributes = Property->getAttributes())
+ addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
+ PropertyAttributes);
+ }
+ }
+
+ if (CTy->isAppleBlockExtension())
+ addFlag(Buffer, dwarf::DW_AT_APPLE_block);
+
+ // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
+ // inside C++ composite types to point to the base class with the vtable.
+ if (auto *ContainingType =
+ dyn_cast_or_null<DICompositeType>(resolve(CTy->getVTableHolder())))
+ addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
+ *getOrCreateTypeDIE(ContainingType));
+
+ if (CTy->isObjcClassComplete())
+ addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
+
+ // Add template parameters to a class, structure or union types.
+ // FIXME: The support isn't in the metadata for this yet.
+ if (Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+ addTemplateParams(Buffer, CTy->getTemplateParams());
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(Buffer, dwarf::DW_AT_name, Name);
+
+ if (Tag == dwarf::DW_TAG_enumeration_type ||
+ Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
+ Tag == dwarf::DW_TAG_union_type) {
+ // Add size if non-zero (derived types might be zero-sized.)
+ // TODO: Do we care about size for enum forward declarations?
+ if (Size)
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+ else if (!CTy->isForwardDecl())
+ // Add zero size if it is not a forward declaration.
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0);
+
+ // If we're a forward decl, say so.
+ if (CTy->isForwardDecl())
+ addFlag(Buffer, dwarf::DW_AT_declaration);
+
+ // Add source line info if available.
+ if (!CTy->isForwardDecl())
+ addSourceLine(Buffer, CTy);
+
+ // No harm in adding the runtime language to the declaration.
+ unsigned RLang = CTy->getRuntimeLang();
+ if (RLang)
+ addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
+ RLang);
+ }
+}
+
+void DwarfUnit::constructTemplateTypeParameterDIE(
+ DIE &Buffer, const DITemplateTypeParameter *TP) {
+ DIE &ParamDIE =
+ createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
+ // Add the type if it exists, it could be void and therefore no type.
+ if (TP->getType())
+ addType(ParamDIE, resolve(TP->getType()));
+ if (!TP->getName().empty())
+ addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
+}
+
+void DwarfUnit::constructTemplateValueParameterDIE(
+ DIE &Buffer, const DITemplateValueParameter *VP) {
+ DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer);
+
+ // Add the type if there is one, template template and template parameter
+ // packs will not have a type.
+ if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
+ addType(ParamDIE, resolve(VP->getType()));
+ if (!VP->getName().empty())
+ addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
+ if (Metadata *Val = VP->getValue()) {
+ if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
+ addConstantValue(ParamDIE, CI, resolve(VP->getType()));
+ else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
+ // We cannot describe the location of dllimport'd entities: the
+ // computation of their address requires loads from the IAT.
+ if (!GV->hasDLLImportStorageClass()) {
+ // For declaration non-type template parameters (such as global values
+ // and functions)
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ addOpAddress(*Loc, Asm->getSymbol(GV));
+ // Emit DW_OP_stack_value to use the address as the immediate value of
+ // the parameter, rather than a pointer to it.
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
+ addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
+ }
+ } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+ assert(isa<MDString>(Val));
+ addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
+ cast<MDString>(Val)->getString());
+ } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+ addTemplateParams(ParamDIE, cast<MDTuple>(Val));
+ }
+ }
+}
+
+DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(NS->getScope());
+
+ if (DIE *NDie = getDIE(NS))
+ return NDie;
+ DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
+
+ StringRef Name = NS->getName();
+ if (!Name.empty())
+ addString(NDie, dwarf::DW_AT_name, NS->getName());
+ else
+ Name = "(anonymous namespace)";
+ DD->addAccelNamespace(Name, NDie);
+ addGlobalName(Name, NDie, NS->getScope());
+ addSourceLine(NDie, NS);
+ return &NDie;
+}
+
+DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(M->getScope());
+
+ if (DIE *MDie = getDIE(M))
+ return MDie;
+ DIE &MDie = createAndAddDIE(dwarf::DW_TAG_module, *ContextDIE, M);
+
+ if (!M->getName().empty()) {
+ addString(MDie, dwarf::DW_AT_name, M->getName());
+ addGlobalName(M->getName(), MDie, M->getScope());
+ }
+ if (!M->getConfigurationMacros().empty())
+ addString(MDie, dwarf::DW_AT_LLVM_config_macros,
+ M->getConfigurationMacros());
+ if (!M->getIncludePath().empty())
+ addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
+ if (!M->getISysRoot().empty())
+ addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot());
+
+ return &MDie;
+}
+
+DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE (as is the case for member function
+ // declarations).
+ DIE *ContextDIE =
+ Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope()));
+
+ if (DIE *SPDie = getDIE(SP))
+ return SPDie;
+
+ if (auto *SPDecl = SP->getDeclaration()) {
+ if (!Minimal) {
+ // Add subprogram definitions to the CU die directly.
+ ContextDIE = &getUnitDie();
+ // Build the decl now to ensure it precedes the definition.
+ getOrCreateSubprogramDIE(SPDecl);
+ }
+ }
+
+ // DW_TAG_inlined_subroutine may refer to this DIE.
+ DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
+
+ // Stop here and fill this in later, depending on whether or not this
+ // subprogram turns out to have inlined instances or not.
+ if (SP->isDefinition())
+ return &SPDie;
+
+ applySubprogramAttributes(SP, SPDie);
+ return &SPDie;
+}
+
+bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
+ DIE &SPDie) {
+ DIE *DeclDie = nullptr;
+ StringRef DeclLinkageName;
+ if (auto *SPDecl = SP->getDeclaration()) {
+ DeclDie = getDIE(SPDecl);
+ assert(DeclDie && "This DIE should've already been constructed when the "
+ "definition DIE was created in "
+ "getOrCreateSubprogramDIE");
+ DeclLinkageName = SPDecl->getLinkageName();
+ unsigned DeclID =
+ getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory());
+ unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory());
+ if (DeclID != DefID)
+ addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
+
+ if (SP->getLine() != SPDecl->getLine())
+ addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine());
+ }
+
+ // Add function template parameters.
+ addTemplateParams(SPDie, SP->getTemplateParams());
+
+ // Add the linkage name if we have one and it isn't in the Decl.
+ StringRef LinkageName = SP->getLinkageName();
+ assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
+ LinkageName == DeclLinkageName) &&
+ "decl has a linkage name and it is different");
+ if (DeclLinkageName.empty() &&
+ // Always emit it for abstract subprograms.
+ (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP)))
+ addLinkageName(SPDie, LinkageName);
+
+ if (!DeclDie)
+ return false;
+
+ // Refer to the function declaration where all the other attributes will be
+ // found.
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie);
+ return true;
+}
+
+void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
+ bool Minimal) {
+ if (!Minimal)
+ if (applySubprogramDefinitionAttributes(SP, SPDie))
+ return;
+
+ // Constructors and operators for anonymous aggregates do not have names.
+ if (!SP->getName().empty())
+ addString(SPDie, dwarf::DW_AT_name, SP->getName());
+
+ // Skip the rest of the attributes under -gmlt to save space.
+ if (Minimal)
+ return;
+
+ addSourceLine(SPDie, SP);
+
+ // Add the prototype if we have a prototype and we have a C like
+ // language.
+ uint16_t Language = getLanguage();
+ if (SP->isPrototyped() &&
+ (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
+ Language == dwarf::DW_LANG_ObjC))
+ addFlag(SPDie, dwarf::DW_AT_prototyped);
+
+ unsigned CC = 0;
+ DITypeRefArray Args;
+ if (const DISubroutineType *SPTy = SP->getType()) {
+ Args = SPTy->getTypeArray();
+ CC = SPTy->getCC();
+ }
+
+ // Add a DW_AT_calling_convention if this has an explicit convention.
+ if (CC && CC != dwarf::DW_CC_normal)
+ addUInt(SPDie, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, CC);
+
+ // Add a return type. If this is a type like a C/C++ void type we don't add a
+ // return type.
+ if (Args.size())
+ if (auto Ty = resolve(Args[0]))
+ addType(SPDie, Ty);
+
+ unsigned VK = SP->getVirtuality();
+ if (VK) {
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
+ if (SP->getVirtualIndex() != -1u) {
+ DIELoc *Block = getDIELoc();
+ addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
+ }
+ ContainingTypeMap.insert(
+ std::make_pair(&SPDie, resolve(SP->getContainingType())));
+ }
+
+ if (!SP->isDefinition()) {
+ addFlag(SPDie, dwarf::DW_AT_declaration);
+
+ // Add arguments. Do not add arguments for subprogram definition. They will
+ // be handled while processing variables.
+ constructSubprogramArguments(SPDie, Args);
+ }
+
+ if (SP->isArtificial())
+ addFlag(SPDie, dwarf::DW_AT_artificial);
+
+ if (!SP->isLocalToUnit())
+ addFlag(SPDie, dwarf::DW_AT_external);
+
+ if (DD->useAppleExtensionAttributes()) {
+ if (SP->isOptimized())
+ addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
+
+ if (unsigned isa = Asm->getISAEncoding())
+ addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
+
+ if (SP->isLValueReference())
+ addFlag(SPDie, dwarf::DW_AT_reference);
+
+ if (SP->isRValueReference())
+ addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
+
+ if (SP->isProtected())
+ addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (SP->isPrivate())
+ addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else if (SP->isPublic())
+ addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+
+ if (SP->isExplicit())
+ addFlag(SPDie, dwarf::DW_AT_explicit);
+}
+
+void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
+ DIE *IndexTy) {
+ DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy);
+
+ // The LowerBound value defines the lower bounds which is typically zero for
+ // C/C++. The Count value is the number of elements. Values are 64 bit. If
+ // Count == -1 then the array is unbounded and we do not emit
+ // DW_AT_lower_bound and DW_AT_count attributes.
+ int64_t LowerBound = SR->getLowerBound();
+ int64_t DefaultLowerBound = getDefaultLowerBound();
+ int64_t Count = SR->getCount();
+
+ if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
+ addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
+
+ if (Count != -1)
+ // FIXME: An unbounded array should reference the expression that defines
+ // the array.
+ addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count);
+}
+
+DIE *DwarfUnit::getIndexTyDie() {
+ if (IndexTyDie)
+ return IndexTyDie;
+ // Construct an integer type to use for indexes.
+ IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie);
+ addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype");
+ addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
+ addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ dwarf::DW_ATE_unsigned);
+ return IndexTyDie;
+}
+
+void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+ if (CTy->isVector())
+ addFlag(Buffer, dwarf::DW_AT_GNU_vector);
+
+ // Emit the element type.
+ addType(Buffer, resolve(CTy->getBaseType()));
+
+ // Get an anonymous type for index type.
+ // FIXME: This type should be passed down from the front end
+ // as different languages may have different sizes for indexes.
+ DIE *IdxTy = getIndexTyDie();
+
+ // Add subranges to array type.
+ DINodeArray Elements = CTy->getElements();
+ for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+ // FIXME: Should this really be such a loose cast?
+ if (auto *Element = dyn_cast_or_null<DINode>(Elements[i]))
+ if (Element->getTag() == dwarf::DW_TAG_subrange_type)
+ constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy);
+ }
+}
+
+void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+ DINodeArray Elements = CTy->getElements();
+
+ // Add enumerators to enumeration type.
+ for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+ auto *Enum = dyn_cast_or_null<DIEnumerator>(Elements[i]);
+ if (Enum) {
+ DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
+ StringRef Name = Enum->getName();
+ addString(Enumerator, dwarf::DW_AT_name, Name);
+ int64_t Value = Enum->getValue();
+ addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+ Value);
+ }
+ }
+ const DIType *DTy = resolve(CTy->getBaseType());
+ if (DTy) {
+ addType(Buffer, DTy);
+ addFlag(Buffer, dwarf::DW_AT_enum_class);
+ }
+}
+
+void DwarfUnit::constructContainingTypeDIEs() {
+ for (auto CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end();
+ CI != CE; ++CI) {
+ DIE &SPDie = *CI->first;
+ const DINode *D = CI->second;
+ if (!D)
+ continue;
+ DIE *NDie = getDIE(D);
+ if (!NDie)
+ continue;
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, *NDie);
+ }
+}
+
+void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
+ DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);
+ StringRef Name = DT->getName();
+ if (!Name.empty())
+ addString(MemberDie, dwarf::DW_AT_name, Name);
+
+ addType(MemberDie, resolve(DT->getBaseType()));
+
+ addSourceLine(MemberDie, DT);
+
+ if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) {
+
+ // For C++, virtual base classes are not at fixed offset. Use following
+ // expression to extract appropriate offset from vtable.
+ // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+ DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc;
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits());
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
+ } else {
+ uint64_t Size = DT->getSizeInBits();
+ uint64_t FieldSize = DD->getBaseTypeSize(DT);
+ uint64_t OffsetInBytes;
+
+ bool IsBitfield = FieldSize && Size != FieldSize;
+ if (IsBitfield) {
+ // Handle bitfield, assume bytes are 8 bits.
+ if (DD->useDWARF2Bitfields())
+ addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
+ addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
+
+ uint64_t Offset = DT->getOffsetInBits();
+ uint64_t Align = DT->getAlignInBits() ? DT->getAlignInBits() : FieldSize;
+ uint64_t AlignMask = ~(Align - 1);
+ // The bits from the start of the storage unit to the start of the field.
+ uint64_t StartBitOffset = Offset - (Offset & AlignMask);
+ // The byte offset of the field's aligned storage unit inside the struct.
+ OffsetInBytes = (Offset - StartBitOffset) / 8;
+
+ if (DD->useDWARF2Bitfields()) {
+ uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+ uint64_t FieldOffset = (HiMark - FieldSize);
+ Offset -= FieldOffset;
+
+ // Maybe we need to work from the other end.
+ if (Asm->getDataLayout().isLittleEndian())
+ Offset = FieldSize - (Offset + Size);
+
+ addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset);
+ OffsetInBytes = FieldOffset >> 3;
+ } else {
+ addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, None, Offset);
+ }
+ } else {
+ // This is not a bitfield.
+ OffsetInBytes = DT->getOffsetInBits() / 8;
+ }
+
+ if (DD->getDwarfVersion() <= 2) {
+ DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc;
+ addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie);
+ } else if (!IsBitfield || DD->useDWARF2Bitfields())
+ addUInt(MemberDie, dwarf::DW_AT_data_member_location, None,
+ OffsetInBytes);
+ }
+
+ if (DT->isProtected())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (DT->isPrivate())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ // Otherwise C++ member and base classes are considered public.
+ else if (DT->isPublic())
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+ if (DT->isVirtual())
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
+ dwarf::DW_VIRTUALITY_virtual);
+
+ // Objective-C properties.
+ if (DINode *PNode = DT->getObjCProperty())
+ if (DIE *PDie = getDIE(PNode))
+ MemberDie.addValue(DIEValueAllocator, dwarf::DW_AT_APPLE_property,
+ dwarf::DW_FORM_ref4, DIEEntry(*PDie));
+
+ if (DT->isArtificial())
+ addFlag(MemberDie, dwarf::DW_AT_artificial);
+}
+
+DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
+ if (!DT)
+ return nullptr;
+
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope()));
+ assert(dwarf::isType(ContextDIE->getTag()) &&
+ "Static member should belong to a type.");
+
+ if (DIE *StaticMemberDIE = getDIE(DT))
+ return StaticMemberDIE;
+
+ DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
+
+ const DIType *Ty = resolve(DT->getBaseType());
+
+ addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
+ addType(StaticMemberDIE, Ty);
+ addSourceLine(StaticMemberDIE, DT);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_external);
+ addFlag(StaticMemberDIE, dwarf::DW_AT_declaration);
+
+ // FIXME: We could omit private if the parent is a class_type, and
+ // public if the parent is something else.
+ if (DT->isProtected())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_protected);
+ else if (DT->isPrivate())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_private);
+ else if (DT->isPublic())
+ addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+ dwarf::DW_ACCESS_public);
+
+ if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
+ addConstantValue(StaticMemberDIE, CI, Ty);
+ if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
+ addConstantFPValue(StaticMemberDIE, CFP);
+
+ return &StaticMemberDIE;
+}
+
+void DwarfUnit::emitHeader(bool UseOffsets) {
+ // Emit size of content not including length itself
+ Asm->OutStreamer->AddComment("Length of Unit");
+ Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
+
+ Asm->OutStreamer->AddComment("DWARF version number");
+ Asm->EmitInt16(DD->getDwarfVersion());
+ Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
+
+ // We share one abbreviations table across all units so it's always at the
+ // start of the section. Use a relocatable offset where needed to ensure
+ // linking doesn't invalidate that offset.
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ if (UseOffsets)
+ Asm->EmitInt32(0);
+ else
+ Asm->emitDwarfSymbolReference(
+ TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
+
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+}
+
+void DwarfUnit::initSection(MCSection *Section) {
+ assert(!this->Section);
+ this->Section = Section;
+}
+
+void DwarfTypeUnit::emitHeader(bool UseOffsets) {
+ DwarfUnit::emitHeader(UseOffsets);
+ Asm->OutStreamer->AddComment("Type Signature");
+ Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature));
+ Asm->OutStreamer->AddComment("Type DIE Offset");
+ // In a skeleton type unit there is no type DIE so emit a zero offset.
+ Asm->OutStreamer->EmitIntValue(Ty ? Ty->getOffset() : 0,
+ sizeof(Ty->getOffset()));
+}
+
+bool DwarfTypeUnit::isDwoUnit() const {
+ // Since there are no skeleton type units, all type units are dwo type units
+ // when split DWARF is being used.
+ return DD->useSplitDwarf();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
new file mode 100644
index 000000000000..e225f92116d4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -0,0 +1,391 @@
+//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H
+
+#include "DwarfDebug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+
+namespace llvm {
+
+class MachineLocation;
+class MachineOperand;
+class ConstantInt;
+class ConstantFP;
+class DbgVariable;
+class DwarfCompileUnit;
+
+// Data structure to hold a range for range lists.
+class RangeSpan {
+public:
+ RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
+ const MCSymbol *getStart() const { return Start; }
+ const MCSymbol *getEnd() const { return End; }
+ void setEnd(const MCSymbol *E) { End = E; }
+
+private:
+ const MCSymbol *Start, *End;
+};
+
+class RangeSpanList {
+private:
+ // Index for locating within the debug_range section this particular span.
+ MCSymbol *RangeSym;
+ // List of ranges.
+ SmallVector<RangeSpan, 2> Ranges;
+
+public:
+ RangeSpanList(MCSymbol *Sym, SmallVector<RangeSpan, 2> Ranges)
+ : RangeSym(Sym), Ranges(std::move(Ranges)) {}
+ MCSymbol *getSym() const { return RangeSym; }
+ const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
+ void addRange(RangeSpan Range) { Ranges.push_back(Range); }
+};
+
+//===----------------------------------------------------------------------===//
+/// This dwarf writer support class manages information associated with a
+/// source file.
+class DwarfUnit {
+protected:
+ /// MDNode for the compile unit.
+ const DICompileUnit *CUNode;
+
+ // All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
+
+ /// Unit debug information entry.
+ DIE &UnitDie;
+
+ /// Target of Dwarf emission.
+ AsmPrinter *Asm;
+
+ // Holders for some common dwarf information.
+ DwarfDebug *DD;
+ DwarfFile *DU;
+
+ /// An anonymous type for index type. Owned by UnitDie.
+ DIE *IndexTyDie;
+
+ /// Tracks the mapping of unit level debug information variables to debug
+ /// information entries.
+ DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+ /// A list of all the DIEBlocks in use.
+ std::vector<DIEBlock *> DIEBlocks;
+
+ /// A list of all the DIELocs in use.
+ std::vector<DIELoc *> DIELocs;
+
+ /// This map is used to keep track of subprogram DIEs that need
+ /// DW_AT_containing_type attribute. This attribute points to a DIE that
+ /// corresponds to the MDNode mapped with the subprogram DIE.
+ DenseMap<DIE *, const DINode *> ContainingTypeMap;
+
+ /// The section this unit will be emitted in.
+ MCSection *Section;
+
+ DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU);
+
+ bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
+
+public:
+ virtual ~DwarfUnit();
+
+ void initSection(MCSection *Section);
+
+ MCSection *getSection() const {
+ assert(Section);
+ return Section;
+ }
+
+ // Accessors.
+ AsmPrinter* getAsmPrinter() const { return Asm; }
+ uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
+ const DICompileUnit *getCUNode() const { return CUNode; }
+ DIE &getUnitDie() { return UnitDie; }
+
+ /// Return true if this compile unit has something to write out.
+ bool hasContent() const { return UnitDie.hasChildren(); }
+
+ /// Get string containing language specific context for a global name.
+ ///
+ /// Walks the metadata parent chain in a language specific manner (using the
+ /// compile unit language) and returns it as a string. This is done at the
+ /// metadata level because DIEs may not currently have been added to the
+ /// parent context and walking the DIEs looking for names is more expensive
+ /// than walking the metadata.
+ std::string getParentContextString(const DIScope *Context) const;
+
+ /// Add a new global name to the compile unit.
+ virtual void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) {
+ }
+
+ /// Add a new global type to the compile unit.
+ virtual void addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) {}
+
+ /// Returns the DIE map slot for the specified debug variable.
+ ///
+ /// We delegate the request to DwarfDebug when the MDNode can be part of the
+ /// type system, since DIEs for the type system can be shared across CUs and
+ /// the mappings are kept in DwarfDebug.
+ DIE *getDIE(const DINode *D) const;
+
+ /// Returns a fresh newly allocated DIELoc.
+ DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc; }
+
+ /// Insert DIE into the map.
+ ///
+ /// We delegate the request to DwarfDebug when the MDNode can be part of the
+ /// type system, since DIEs for the type system can be shared across CUs and
+ /// the mappings are kept in DwarfDebug.
+ void insertDIE(const DINode *Desc, DIE *D);
+
+ /// Add a flag that is true to the DIE.
+ void addFlag(DIE &Die, dwarf::Attribute Attribute);
+
+ /// Add an unsigned integer attribute data and value.
+ void addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, uint64_t Integer);
+
+ void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer);
+
+ /// Add an signed integer attribute data and value.
+ void addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
+ Optional<dwarf::Form> Form, int64_t Integer);
+
+ void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
+
+ /// Add a string attribute data and value.
+ ///
+ /// We always emit a reference to the string pool instead of immediate
+ /// strings so that DIEs have more predictable sizes. In the case of split
+ /// dwarf we emit an index into another table which gets us the static offset
+ /// into the string table.
+ void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
+
+ /// Add a Dwarf label attribute data and value.
+ DIEValueList::value_iterator addLabel(DIEValueList &Die,
+ dwarf::Attribute Attribute,
+ dwarf::Form Form,
+ const MCSymbol *Label);
+
+ void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
+
+ /// Add an offset into a section attribute data and value.
+ void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer);
+
+ /// Add a dwarf op address data and value using the form given and an
+ /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+ void addOpAddress(DIELoc &Die, const MCSymbol *Label);
+
+ /// Add a label delta attribute data and value.
+ void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
+ const MCSymbol *Lo);
+
+ /// Add a DIE attribute data and value.
+ void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
+
+ /// Add a DIE attribute data and value.
+ void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry);
+
+ /// Add a type's DW_AT_signature and set the declaration flag.
+ void addDIETypeSignature(DIE &Die, uint64_t Signature);
+ /// Add an attribute containing the type signature for a unique identifier.
+ void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
+ StringRef Identifier);
+
+ /// Add block data.
+ void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
+
+ /// Add block data.
+ void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
+
+ /// Add location information to specified debug information entry.
+ void addSourceLine(DIE &Die, unsigned Line, StringRef File,
+ StringRef Directory);
+ void addSourceLine(DIE &Die, const DILocalVariable *V);
+ void addSourceLine(DIE &Die, const DIGlobalVariable *G);
+ void addSourceLine(DIE &Die, const DISubprogram *SP);
+ void addSourceLine(DIE &Die, const DIType *Ty);
+ void addSourceLine(DIE &Die, const DINamespace *NS);
+ void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
+
+ /// Add constant value entry in variable DIE.
+ void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty);
+ void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
+ void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
+ void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
+ void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
+
+ /// Add constant value entry in variable DIE.
+ void addConstantFPValue(DIE &Die, const MachineOperand &MO);
+ void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
+
+ /// Add a linkage name, if it isn't empty.
+ void addLinkageName(DIE &Die, StringRef LinkageName);
+
+ /// Add template parameters in buffer.
+ void addTemplateParams(DIE &Buffer, DINodeArray TParams);
+
+ /// Add register operand.
+ /// \returns false if the register does not exist, e.g., because it was never
+ /// materialized.
+ bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
+ unsigned SizeInBits = 0, unsigned OffsetInBits = 0);
+
+ /// Add register offset.
+ /// \returns false if the register does not exist, e.g., because it was never
+ /// materialized.
+ bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
+
+ // FIXME: Should be reformulated in terms of addComplexAddress.
+ /// Start with the address based on the location provided, and generate the
+ /// DWARF information necessary to find the actual Block variable (navigating
+ /// the Block struct) based on the starting location. Add the DWARF
+ /// information to the die. Obsolete, please use addComplexAddress instead.
+ void addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
+ dwarf::Attribute Attribute,
+ const MachineLocation &Location);
+
+ /// Add a new type attribute to the specified entity.
+ ///
+ /// This takes and attribute parameter because DW_AT_friend attributes are
+ /// also type references.
+ void addType(DIE &Entity, const DIType *Ty,
+ dwarf::Attribute Attribute = dwarf::DW_AT_type);
+
+ DIE *getOrCreateNameSpace(const DINamespace *NS);
+ DIE *getOrCreateModule(const DIModule *M);
+ DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
+
+ void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
+ bool Minimal = false);
+
+ /// Find existing DIE or create new DIE for the given type.
+ DIE *getOrCreateTypeDIE(const MDNode *N);
+
+ /// Get context owner's DIE.
+ DIE *createTypeDIE(const DICompositeType *Ty);
+
+ /// Get context owner's DIE.
+ DIE *getOrCreateContextDIE(const DIScope *Context);
+
+ /// Construct DIEs for types that contain vtables.
+ void constructContainingTypeDIEs();
+
+ /// Construct function argument DIEs.
+ void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args);
+
+ /// Create a DIE with the given Tag, add the DIE to its parent, and
+ /// call insertDIE if MD is not null.
+ DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr);
+
+ /// Compute the size of a header for this unit, not including the initial
+ /// length field.
+ virtual unsigned getHeaderSize() const {
+ return sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+ }
+
+ /// Emit the header for this unit, not including the initial length field.
+ virtual void emitHeader(bool UseOffsets);
+
+ virtual DwarfCompileUnit &getCU() = 0;
+
+ void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+
+protected:
+ /// Create new static data member DIE.
+ DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
+
+ /// Look up the source ID with the given directory and source file names. If
+ /// none currently exists, create a new ID and insert it in the line table.
+ virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
+
+ /// Look in the DwarfDebug map for the MDNode that corresponds to the
+ /// reference.
+ template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
+ return Ref.resolve();
+ }
+
+private:
+ void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
+ void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
+ void constructTypeDIE(DIE &Buffer, const DISubroutineType *DTy);
+ void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy);
+ void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+ void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy);
+ void constructMemberDIE(DIE &Buffer, const DIDerivedType *DT);
+ void constructTemplateTypeParameterDIE(DIE &Buffer,
+ const DITemplateTypeParameter *TP);
+ void constructTemplateValueParameterDIE(DIE &Buffer,
+ const DITemplateValueParameter *TVP);
+
+ /// Return the default lower bound for an array.
+ ///
+ /// If the DWARF version doesn't handle the language, return -1.
+ int64_t getDefaultLowerBound() const;
+
+ /// Get an anonymous type for index type.
+ DIE *getIndexTyDie();
+
+ /// Set D as anonymous type for index which can be reused later.
+ void setIndexTyDie(DIE *D) { IndexTyDie = D; }
+
+ /// If this is a named finished type then include it in the list of types for
+ /// the accelerator tables.
+ void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+ const DIE &TyDIE);
+
+ virtual bool isDwoUnit() const = 0;
+};
+
+class DwarfTypeUnit : public DwarfUnit {
+ uint64_t TypeSignature;
+ const DIE *Ty;
+ DwarfCompileUnit &CU;
+ MCDwarfDwoLineTable *SplitLineTable;
+
+ unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override;
+ bool isDwoUnit() const override;
+
+public:
+ DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW,
+ DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable = nullptr);
+
+ void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
+ void setType(const DIE *Ty) { this->Ty = Ty; }
+
+ /// Emit the header for this unit, not including the initial length field.
+ void emitHeader(bool UseOffsets) override;
+ unsigned getHeaderSize() const override {
+ return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
+ sizeof(uint32_t); // Type DIE Offset
+ }
+ DwarfCompileUnit &getCU() override { return CU; }
+};
+} // end llvm namespace
+#endif
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
new file mode 100644
index 000000000000..e24dcb1bffd4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -0,0 +1,689 @@
+//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing exception info into assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EHStreamer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
+
+EHStreamer::~EHStreamer() {}
+
+/// How many leading type ids two landing pads have in common.
+unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
+ const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+ unsigned LSize = LIds.size(), RSize = RIds.size();
+ unsigned MinSize = LSize < RSize ? LSize : RSize;
+ unsigned Count = 0;
+
+ for (; Count != MinSize; ++Count)
+ if (LIds[Count] != RIds[Count])
+ return Count;
+
+ return Count;
+}
+
+/// Compute the actions table and gather the first action index for each landing
+/// pad site.
+unsigned EHStreamer::
+computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions) {
+
+ // The action table follows the call-site table in the LSDA. The individual
+ // records are of two types:
+ //
+ // * Catch clause
+ // * Exception specification
+ //
+ // The two record kinds have the same format, with only small differences.
+ // They are distinguished by the "switch value" field: Catch clauses
+ // (TypeInfos) have strictly positive switch values, and exception
+ // specifications (FilterIds) have strictly negative switch values. Value 0
+ // indicates a catch-all clause.
+ //
+ // Negative type IDs index into FilterIds. Positive type IDs index into
+ // TypeInfos. The value written for a positive type ID is just the type ID
+ // itself. For a negative type ID, however, the value written is the
+ // (negative) byte offset of the corresponding FilterIds entry. The byte
+ // offset is usually equal to the type ID (because the FilterIds entries are
+ // written using a variable width encoding, which outputs one byte per entry
+ // as long as the value written is not too large) but can differ. This kind
+ // of complication does not occur for positive type IDs because type infos are
+ // output using a fixed width encoding. FilterOffsets[i] holds the byte
+ // offset corresponding to FilterIds[i].
+
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ SmallVector<int, 16> FilterOffsets;
+ FilterOffsets.reserve(FilterIds.size());
+ int Offset = -1;
+
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
+ FilterOffsets.push_back(Offset);
+ Offset -= getULEB128Size(*I);
+ }
+
+ FirstActions.reserve(LandingPads.size());
+
+ int FirstAction = 0;
+ unsigned SizeActions = 0;
+ const LandingPadInfo *PrevLPI = nullptr;
+
+ for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+ I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+ const LandingPadInfo *LPI = *I;
+ const std::vector<int> &TypeIds = LPI->TypeIds;
+ unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0;
+ unsigned SizeSiteActions = 0;
+
+ if (NumShared < TypeIds.size()) {
+ unsigned SizeAction = 0;
+ unsigned PrevAction = (unsigned)-1;
+
+ if (NumShared) {
+ unsigned SizePrevIds = PrevLPI->TypeIds.size();
+ assert(Actions.size());
+ PrevAction = Actions.size() - 1;
+ SizeAction = getSLEB128Size(Actions[PrevAction].NextAction) +
+ getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+
+ for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+ assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
+ SizeAction -= getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+ SizeAction += -Actions[PrevAction].NextAction;
+ PrevAction = Actions[PrevAction].Previous;
+ }
+ }
+
+ // Compute the actions.
+ for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+ int TypeID = TypeIds[J];
+ assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+ int ValueForTypeID =
+ isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID;
+ unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
+
+ int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+ SizeAction = SizeTypeID + getSLEB128Size(NextAction);
+ SizeSiteActions += SizeAction;
+
+ ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
+ Actions.push_back(Action);
+ PrevAction = Actions.size() - 1;
+ }
+
+ // Record the first action of the landing pad site.
+ FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+ } // else identical - re-use previous FirstAction
+
+ // Information used when created the call-site table. The action record
+ // field of the call site record is the offset of the first associated
+ // action record, relative to the start of the actions table. This value is
+ // biased by 1 (1 indicating the start of the actions table), and 0
+ // indicates that there are no actions.
+ FirstActions.push_back(FirstAction);
+
+ // Compute this sites contribution to size.
+ SizeActions += SizeSiteActions;
+
+ PrevLPI = LPI;
+ }
+
+ return SizeActions;
+}
+
+/// Return `true' if this is a call to a function marked `nounwind'. Return
+/// `false' otherwise.
+bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
+ assert(MI->isCall() && "This should be a call instruction!");
+
+ bool MarkedNoUnwind = false;
+ bool SawFunc = false;
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+
+ if (!MO.isGlobal()) continue;
+
+ const Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (!F) continue;
+
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
+ }
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
+ }
+
+ return MarkedNoUnwind;
+}
+
+void EHStreamer::computePadMap(
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ RangeMapType &PadMap) {
+ // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+ // by try-range labels when lowered). Ordinary calls do not, so appropriate
+ // try-ranges for them need be deduced so we can put them in the LSDA.
+ for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+ const LandingPadInfo *LandingPad = LandingPads[i];
+ for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+ MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+ assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+ PadRange P = { i, j };
+ PadMap[BeginLabel] = P;
+ }
+ }
+}
+
+/// Compute the call-site table. The entry for an invoke has a try-range
+/// containing the call, a non-zero landing pad, and an appropriate action. The
+/// entry for an ordinary call has a try-range containing the call and zero for
+/// the landing pad and the action. Calls marked 'nounwind' have no entry and
+/// must not be contained in the try-range of any entry - they form gaps in the
+/// table. Entries must be ordered by try-range address.
+void EHStreamer::
+computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
+ RangeMapType PadMap;
+ computePadMap(LandingPads, PadMap);
+
+ // The end label of the previous invoke or nounwind try-range.
+ MCSymbol *LastLabel = nullptr;
+
+ // Whether there is a potentially throwing instruction (currently this means
+ // an ordinary call) between the end of the previous try-range and now.
+ bool SawPotentiallyThrowing = false;
+
+ // Whether the last CallSite entry was for an invoke.
+ bool PreviousIsInvoke = false;
+
+ bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+
+ // Visit all instructions in order of address.
+ for (const auto &MBB : *Asm->MF) {
+ for (const auto &MI : MBB) {
+ if (!MI.isEHLabel()) {
+ if (MI.isCall())
+ SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
+ continue;
+ }
+
+ // End of the previous try-range?
+ MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
+ if (BeginLabel == LastLabel)
+ SawPotentiallyThrowing = false;
+
+ // Beginning of a new try-range?
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+ if (L == PadMap.end())
+ // Nope, it was just some random label.
+ continue;
+
+ const PadRange &P = L->second;
+ const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+ assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+ "Inconsistent landing pad map!");
+
+ // For Dwarf exception handling (SjLj handling doesn't use this). If some
+ // instruction between the previous try-range and this one may throw,
+ // create a call-site entry with no landing pad for the region between the
+ // try-ranges.
+ if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) {
+ CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 };
+ CallSites.push_back(Site);
+ PreviousIsInvoke = false;
+ }
+
+ LastLabel = LandingPad->EndLabels[P.RangeIndex];
+ assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+ if (!LandingPad->LandingPadLabel) {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ } else {
+ // This try-range is for an invoke.
+ CallSiteEntry Site = {
+ BeginLabel,
+ LastLabel,
+ LandingPad,
+ FirstActions[P.PadIndex]
+ };
+
+ // Try to merge with the previous call-site. SJLJ doesn't do this
+ if (PreviousIsInvoke && !IsSJLJ) {
+ CallSiteEntry &Prev = CallSites.back();
+ if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) {
+ // Extend the range of the previous entry.
+ Prev.EndLabel = Site.EndLabel;
+ continue;
+ }
+ }
+
+ // Otherwise, create a new call-site.
+ if (!IsSJLJ)
+ CallSites.push_back(Site);
+ else {
+ // SjLj EH must maintain the call sites in the order assigned
+ // to them by the SjLjPrepare pass.
+ unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+ if (CallSites.size() < SiteNo)
+ CallSites.resize(SiteNo);
+ CallSites[SiteNo - 1] = Site;
+ }
+ PreviousIsInvoke = true;
+ }
+ }
+ }
+
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for the
+ // region following the try-range.
+ if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) {
+ CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
+ CallSites.push_back(Site);
+ }
+}
+
+/// Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy. First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+/// 1. The landing pad site information describes the range of code covered by
+/// the try. In our case it's an accumulation of the ranges covered by the
+/// invokes in the try. There is also a reference to the landing pad that
+/// handles the exception once processed. Finally an index into the actions
+/// table.
+/// 2. The action table, in our case, is composed of pairs of type IDs and next
+/// action offset. Starting with the action index from the landing pad
+/// site, each type ID is checked for a match to the current exception. If
+/// it matches then the exception and type id are passed on to the landing
+/// pad. Otherwise the next action is looked up. This chain is terminated
+/// with a next action of zero. If no type id is found then the frame is
+/// unwound and handling continues.
+/// 3. Type ID table contains references to all the C++ typeinfo for all
+/// catches in the function. This tables is reverse indexed base 1.
+void EHStreamer::emitExceptionTable() {
+ const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+ const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+ // Sort the landing pads in order of their type ids. This is used to fold
+ // duplicate actions.
+ SmallVector<const LandingPadInfo *, 64> LandingPads;
+ LandingPads.reserve(PadInfos.size());
+
+ for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+ LandingPads.push_back(&PadInfos[i]);
+
+ // Order landing pads lexicographically by type id.
+ std::sort(LandingPads.begin(), LandingPads.end(),
+ [](const LandingPadInfo *L,
+ const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; });
+
+ // Compute the actions table and gather the first action index for each
+ // landing pad site.
+ SmallVector<ActionEntry, 32> Actions;
+ SmallVector<unsigned, 64> FirstActions;
+ unsigned SizeActions =
+ computeActionsTable(LandingPads, Actions, FirstActions);
+
+ // Compute the call-site table.
+ SmallVector<CallSiteEntry, 64> CallSites;
+ computeCallSiteTable(CallSites, LandingPads, FirstActions);
+
+ // Final tallies.
+
+ // Call sites.
+ bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+ bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+
+ unsigned CallSiteTableLength;
+ if (IsSJLJ)
+ CallSiteTableLength = 0;
+ else {
+ unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
+ unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
+ CallSiteTableLength =
+ CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
+ }
+
+ for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+ CallSiteTableLength += getULEB128Size(CallSites[i].Action);
+ if (IsSJLJ)
+ CallSiteTableLength += getULEB128Size(i);
+ }
+
+ // Type infos.
+ MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+ unsigned TTypeEncoding;
+ unsigned TypeFormatSize;
+
+ if (!HaveTTData) {
+ // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+ // that we're omitting that bit.
+ TTypeEncoding = dwarf::DW_EH_PE_omit;
+ // dwarf::DW_EH_PE_absptr
+ TypeFormatSize = Asm->getDataLayout().getPointerSize();
+ } else {
+ // Okay, we have actual filters or typeinfos to emit. As such, we need to
+ // pick a type encoding for them. We're about to emit a list of pointers to
+ // typeinfo objects at the end of the LSDA. However, unless we're in static
+ // mode, this reference will require a relocation by the dynamic linker.
+ //
+ // Because of this, we have a couple of options:
+ //
+ // 1) If we are in -static mode, we can always use an absolute reference
+ // from the LSDA, because the static linker will resolve it.
+ //
+ // 2) Otherwise, if the LSDA section is writable, we can output the direct
+ // reference to the typeinfo and allow the dynamic linker to relocate
+ // it. Since it is in a writable section, the dynamic linker won't
+ // have a problem.
+ //
+ // 3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+ // we need to use some form of indirection. For example, on Darwin,
+ // we can output a statically-relocatable reference to a dyld stub. The
+ // offset to the stub is constant, but the contents are in a section
+ // that is updated by the dynamic linker. This is easy enough, but we
+ // need to tell the personality function of the unwinder to indirect
+ // through the dyld stub.
+ //
+ // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+ // somewhere. This predicate should be moved to a shared location that is
+ // in target-independent code.
+ //
+ TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+ TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
+ }
+
+ // Begin the exception table.
+ // Sometimes we want not to emit the data into separate section (e.g. ARM
+ // EHABI). In this case LSDASection will be NULL.
+ if (LSDASection)
+ Asm->OutStreamer->SwitchSection(LSDASection);
+ Asm->EmitAlignment(2);
+
+ // Emit the LSDA.
+ MCSymbol *GCCETSym =
+ Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+
+ Twine(Asm->getFunctionNumber()));
+ Asm->OutStreamer->EmitLabel(GCCETSym);
+ Asm->OutStreamer->EmitLabel(Asm->getCurExceptionSym());
+
+ // Emit the LSDA header.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ Asm->EmitEncodingByte(TTypeEncoding, "@TType");
+
+ // The type infos need to be aligned. GCC does this by inserting padding just
+ // before the type infos. However, this changes the size of the exception
+ // table, so you need to take this into account when you output the exception
+ // table size. However, the size is output using a variable length encoding.
+ // So by increasing the size by inserting padding, you may increase the number
+ // of bytes used for writing the size. If it increases, say by one byte, then
+ // you now need to output one less byte of padding to get the type infos
+ // aligned. However this decreases the size of the exception table. This
+ // changes the value you have to output for the exception table size. Due to
+ // the variable length encoding, the number of bytes used for writing the
+ // length may decrease. If so, you then have to increase the amount of
+ // padding. And so on. If you look carefully at the GCC code you will see that
+ // it indeed does this in a loop, going on and on until the values stabilize.
+ // We chose another solution: don't output padding inside the table like GCC
+ // does, instead output it before the table.
+ unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+ unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength);
+ unsigned TTypeBaseOffset =
+ sizeof(int8_t) + // Call site format
+ CallSiteTableLengthSize + // Call site table length size
+ CallSiteTableLength + // Call site table length
+ SizeActions + // Actions size
+ SizeTypes;
+ unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset);
+ unsigned TotalSize =
+ sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size
+ TTypeBaseOffset; // TType base offset
+ unsigned SizeAlign = (4 - TotalSize) & 3;
+
+ if (HaveTTData) {
+ // Account for any extra padding that will be added to the call site table
+ // length.
+ Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+ SizeAlign = 0;
+ }
+
+ bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+
+ // SjLj Exception handling
+ if (IsSJLJ) {
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ // Emit the landing pad site information.
+ unsigned idx = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+ const CallSiteEntry &S = *I;
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (VerboseAsm) {
+ Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<");
+ Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx));
+ }
+ Asm->EmitULEB128(idx);
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer->AddComment(" Action: cleanup");
+ else
+ Asm->OutStreamer->AddComment(" Action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->EmitULEB128(S.Action);
+ }
+ } else {
+ // Itanium LSDA exception handling
+
+ // The call-site table is a list of all call sites that may throw an
+ // exception (including C++ 'throw' statements) in the procedure
+ // fragment. It immediately follows the LSDA header. Each entry indicates,
+ // for a given call, the first corresponding action record and corresponding
+ // landing pad.
+ //
+ // The table begins with the number of bytes, stored as an LEB128
+ // compressed, unsigned integer. The records immediately follow the record
+ // count. They are sorted in increasing call-site address. Each record
+ // indicates:
+ //
+ // * The position of the call-site.
+ // * The position of the landing pad.
+ // * The first action record for that call site.
+ //
+ // A missing entry in the call-site table indicates that a call is not
+ // supposed to throw.
+
+ // Emit the landing pad call site table.
+ Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+ unsigned Entry = 0;
+ for (SmallVectorImpl<CallSiteEntry>::const_iterator
+ I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+ const CallSiteEntry &S = *I;
+
+ MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
+
+ MCSymbol *BeginLabel = S.BeginLabel;
+ if (!BeginLabel)
+ BeginLabel = EHFuncBeginSym;
+ MCSymbol *EndLabel = S.EndLabel;
+ if (!EndLabel)
+ EndLabel = Asm->getFunctionEnd();
+
+ // Offset of the call site relative to the previous call site, counted in
+ // number of 16-byte bundles. The first call site is counted relative to
+ // the start of the procedure fragment.
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
+ Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(Twine(" Call between ") +
+ BeginLabel->getName() + " and " +
+ EndLabel->getName());
+ Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+
+ // Offset of the landing pad, counted in 16-byte bundles relative to the
+ // @LPStart address.
+ if (!S.LPad) {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(" has no landing pad");
+ Asm->OutStreamer->EmitIntValue(0, 4/*size*/);
+ } else {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(Twine(" jumps to ") +
+ S.LPad->LandingPadLabel->getName());
+ Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4);
+ }
+
+ // Offset of the first associated action record, relative to the start of
+ // the action table. This value is biased by 1 (1 indicates the start of
+ // the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer->AddComment(" On action: cleanup");
+ else
+ Asm->OutStreamer->AddComment(" On action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->EmitULEB128(S.Action);
+ }
+ }
+
+ // Emit the Action Table.
+ int Entry = 0;
+ for (SmallVectorImpl<ActionEntry>::const_iterator
+ I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+ const ActionEntry &Action = *I;
+
+ if (VerboseAsm) {
+ // Emit comments that decode the action table.
+ Asm->OutStreamer->AddComment(">> Action Record " + Twine(++Entry) + " <<");
+ }
+
+ // Type Filter
+ //
+ // Used by the runtime to match the type of the thrown exception to the
+ // type of the catch clauses or the types in the exception specification.
+ if (VerboseAsm) {
+ if (Action.ValueForTypeID > 0)
+ Asm->OutStreamer->AddComment(" Catch TypeInfo " +
+ Twine(Action.ValueForTypeID));
+ else if (Action.ValueForTypeID < 0)
+ Asm->OutStreamer->AddComment(" Filter TypeInfo " +
+ Twine(Action.ValueForTypeID));
+ else
+ Asm->OutStreamer->AddComment(" Cleanup");
+ }
+ Asm->EmitSLEB128(Action.ValueForTypeID);
+
+ // Action Record
+ //
+ // Self-relative signed displacement in bytes of the next action record,
+ // or 0 if there is no next action record.
+ if (VerboseAsm) {
+ if (Action.NextAction == 0) {
+ Asm->OutStreamer->AddComment(" No further actions");
+ } else {
+ unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
+ Asm->OutStreamer->AddComment(" Continue to action "+Twine(NextAction));
+ }
+ }
+ Asm->EmitSLEB128(Action.NextAction);
+ }
+
+ emitTypeInfos(TTypeEncoding);
+
+ Asm->EmitAlignment(2);
+}
+
+void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
+ const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
+ const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+
+ bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+
+ int Entry = 0;
+ // Emit the Catch TypeInfos.
+ if (VerboseAsm && !TypeInfos.empty()) {
+ Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
+ Asm->OutStreamer->AddBlankLine();
+ Entry = TypeInfos.size();
+ }
+
+ for (const GlobalValue *GV : make_range(TypeInfos.rbegin(),
+ TypeInfos.rend())) {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
+ Asm->EmitTTypeReference(GV, TTypeEncoding);
+ }
+
+ // Emit the Exception Specifications.
+ if (VerboseAsm && !FilterIds.empty()) {
+ Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
+ Asm->OutStreamer->AddBlankLine();
+ Entry = 0;
+ }
+ for (std::vector<unsigned>::const_iterator
+ I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+ unsigned TypeID = *I;
+ if (VerboseAsm) {
+ --Entry;
+ if (isFilterEHSelector(TypeID))
+ Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry));
+ }
+
+ Asm->EmitULEB128(TypeID);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
new file mode 100644
index 000000000000..080fdd14b467
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -0,0 +1,137 @@
+//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing exception info into assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H
+
+#include "AsmPrinterHandler.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MachineInstr;
+class MachineFunction;
+class MCSymbol;
+class MCSymbolRefExpr;
+
+template <typename T>
+class SmallVectorImpl;
+
+/// Emits exception handling directives.
+class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler {
+protected:
+ /// Target of directive emission.
+ AsmPrinter *Asm;
+
+ /// Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// How many leading type ids two landing pads have in common.
+ static unsigned sharedTypeIDs(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+
+ /// Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ unsigned Previous;
+ };
+
+ /// Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ MCSymbol *BeginLabel; // Null indicates the start of the function.
+ MCSymbol *EndLabel; // Null indicates the end of the function.
+
+ // LPad contains the landing pad start labels.
+ const LandingPadInfo *LPad; // Null indicates that there is no landing pad.
+ unsigned Action;
+ };
+
+ /// Compute the actions table and gather the first action index for each
+ /// landing pad site.
+ unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
+
+ void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ RangeMapType &PadMap);
+
+ /// Compute the call-site table. The entry for an invoke has a try-range
+ /// containing the call, a non-zero landing pad and an appropriate action.
+ /// The entry for an ordinary call has a try-range containing the call and
+ /// zero for the landing pad and the action. Calls marked 'nounwind' have
+ /// no entry and must not be contained in the try-range of any entry - they
+ /// form gaps in the table. Entries must be ordered by try-range address.
+ void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const SmallVectorImpl<const LandingPadInfo *> &LPs,
+ const SmallVectorImpl<unsigned> &FirstActions);
+
+ /// Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+ void emitExceptionTable();
+
+ virtual void emitTypeInfos(unsigned TTypeEncoding);
+
+ // Helpers for for identifying what kind of clause an EH typeid or selector
+ // corresponds to. Negative selectors are for filter clauses, the zero
+ // selector is for cleanups, and positive selectors are for catch clauses.
+ static bool isFilterEHSelector(int Selector) { return Selector < 0; }
+ static bool isCleanupEHSelector(int Selector) { return Selector == 0; }
+ static bool isCatchEHSelector(int Selector) { return Selector > 0; }
+
+public:
+ EHStreamer(AsmPrinter *A);
+ ~EHStreamer() override;
+
+ // Unused.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+ void beginInstruction(const MachineInstr *MI) override {}
+ void endInstruction() override {}
+
+ /// Return `true' if this is a call to a function marked `nounwind'. Return
+ /// `false' otherwise.
+ static bool callToNoUnwindFunction(const MachineInstr *MI);
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
new file mode 100644
index 000000000000..6a023b998b32
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -0,0 +1,123 @@
+//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the compiler plugin that is used in order to emit
+// garbage collection information in a convenient layout for parsing and
+// loading in the Erlang/OTP runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class ErlangGCPrinter : public GCMetadataPrinter {
+public:
+ void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
+}
+
+static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
+ X("erlang", "erlang-compatible garbage collector");
+
+void llvm::linkErlangGCPrinter() {}
+
+void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
+ AsmPrinter &AP) {
+ MCStreamer &OS = *AP.OutStreamer;
+ unsigned IntPtrSize = M.getDataLayout().getPointerSize();
+
+ // Put this in a custom .note section.
+ OS.SwitchSection(
+ AP.getObjFileLowering().getContext().getELFSection(".note.gc",
+ ELF::SHT_PROGBITS, 0));
+
+ // For each function...
+ for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(),
+ IE = Info.funcinfo_end();
+ FI != IE; ++FI) {
+ GCFunctionInfo &MD = **FI;
+ if (MD.getStrategy().getName() != getStrategy().getName())
+ // this function is managed by some other GC
+ continue;
+ /** A compact GC layout. Emit this data structure:
+ *
+ * struct {
+ * int16_t PointCount;
+ * void *SafePointAddress[PointCount];
+ * int16_t StackFrameSize; (in words)
+ * int16_t StackArity;
+ * int16_t LiveCount;
+ * int16_t LiveOffsets[LiveCount];
+ * } __gcmap_<FUNCTIONNAME>;
+ **/
+
+ // Align to address width.
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ // Emit PointCount.
+ OS.AddComment("safe point count");
+ AP.EmitInt16(MD.size());
+
+ // And each safe point...
+ for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE;
+ ++PI) {
+ // Emit the address of the safe point.
+ OS.AddComment("safe point address");
+ MCSymbol *Label = PI->Label;
+ AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/);
+ }
+
+ // Stack information never change in safe points! Only print info from the
+ // first call-site.
+ GCFunctionInfo::iterator PI = MD.begin();
+
+ // Emit the stack frame size.
+ OS.AddComment("stack frame size (in words)");
+ AP.EmitInt16(MD.getFrameSize() / IntPtrSize);
+
+ // Emit stack arity, i.e. the number of stacked arguments.
+ unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
+ unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs
+ ? MD.getFunction().arg_size() - RegisteredArgs
+ : 0;
+ OS.AddComment("stack arity");
+ AP.EmitInt16(StackArity);
+
+ // Emit the number of live roots in the function.
+ OS.AddComment("live root count");
+ AP.EmitInt16(MD.live_size(PI));
+
+ // And for each live root...
+ for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+ LE = MD.live_end(PI);
+ LI != LE; ++LI) {
+ // Emit live root's offset within the stack frame.
+ OS.AddComment("stack index (offset / wordsize)");
+ AP.EmitInt16(LI->StackOffset / IntPtrSize);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 000000000000..c09ef6adea69
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,182 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+public:
+ void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+ void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
+};
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+ Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() {}
+
+static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
+ const std::string &MId = M.getModuleIdentifier();
+
+ std::string SymName;
+ SymName += "caml";
+ size_t Letter = SymName.size();
+ SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+ SymName += "__";
+ SymName += Id;
+
+ // Capitalize the first letter of the module name.
+ SymName[Letter] = toupper(SymName[Letter]);
+
+ SmallString<128> TmpStr;
+ Mangler::getNameWithPrefix(TmpStr, SymName, M.getDataLayout());
+
+ MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr);
+
+ AP.OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
+ AP.OutStreamer->EmitLabel(Sym);
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
+ AsmPrinter &AP) {
+ AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(M, AP, "code_begin");
+
+ AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(M, AP, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+/// extern "C" struct align(sizeof(intptr_t)) {
+/// uint16_t NumDescriptors;
+/// struct align(sizeof(intptr_t)) {
+/// void *ReturnAddress;
+/// uint16_t FrameSize;
+/// uint16_t NumLiveOffsets;
+/// uint16_t LiveOffsets[NumLiveOffsets];
+/// } Descriptors[NumDescriptors];
+/// } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
+ AsmPrinter &AP) {
+ unsigned IntPtrSize = M.getDataLayout().getPointerSize();
+
+ AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
+ EmitCamlGlobal(M, AP, "code_end");
+
+ AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(M, AP, "data_end");
+
+ // FIXME: Why does ocaml emit this??
+ AP.OutStreamer->EmitIntValue(0, IntPtrSize);
+
+ AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
+ EmitCamlGlobal(M, AP, "frametable");
+
+ int NumDescriptors = 0;
+ for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
+ IE = Info.funcinfo_end();
+ I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+ if (FI.getStrategy().getName() != getStrategy().getName())
+ // this function is managed by some other GC
+ continue;
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ NumDescriptors++;
+ }
+ }
+
+ if (NumDescriptors >= 1 << 16) {
+ // Very rude!
+ report_fatal_error(" Too much descriptor for ocaml GC");
+ }
+ AP.EmitInt16(NumDescriptors);
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+ for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
+ IE = Info.funcinfo_end();
+ I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+ if (FI.getStrategy().getName() != getStrategy().getName())
+ // this function is managed by some other GC
+ continue;
+
+ uint64_t FrameSize = FI.getFrameSize();
+ if (FrameSize >= 1 << 16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Frame size " +
+ Twine(FrameSize) + ">= 65536.\n"
+ "(" +
+ Twine(uintptr_t(&FI)) + ")");
+ }
+
+ AP.OutStreamer->AddComment("live roots for " +
+ Twine(FI.getFunction().getName()));
+ AP.OutStreamer->AddBlankLine();
+
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ size_t LiveCount = FI.live_size(J);
+ if (LiveCount >= 1 << 16) {
+ // Very rude!
+ report_fatal_error("Function '" + FI.getFunction().getName() +
+ "' is too large for the ocaml GC! "
+ "Live root count " +
+ Twine(LiveCount) + " >= 65536.");
+ }
+
+ AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize);
+ AP.EmitInt16(FrameSize);
+ AP.EmitInt16(LiveCount);
+
+ for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+ KE = FI.live_end(J);
+ K != KE; ++K) {
+ if (K->StackOffset >= 1 << 16) {
+ // Very rude!
+ report_fatal_error(
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+ }
+ AP.EmitInt16(K->StackOffset);
+ }
+
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
new file mode 100644
index 000000000000..e5933d8d4160
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -0,0 +1,1273 @@
+//===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing Win64 exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WinException.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCWin64EH.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
+ // MSVC's EH tables are always composed of 32-bit words. All known 64-bit
+ // platforms use an imagerel32 relocation to refer to symbols.
+ useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);
+}
+
+WinException::~WinException() {}
+
+/// endModule - Emit all exception information that should come after the
+/// content.
+void WinException::endModule() {
+ auto &OS = *Asm->OutStreamer;
+ const Module *M = MMI->getModule();
+ for (const Function &F : *M)
+ if (F.hasFnAttribute("safeseh"))
+ OS.EmitCOFFSafeSEH(Asm->getSymbol(&F));
+}
+
+void WinException::beginFunction(const MachineFunction *MF) {
+ shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+
+ // If any landing pads survive, we need an EH table.
+ bool hasLandingPads = !MMI->getLandingPads().empty();
+ bool hasEHFunclets = MMI->hasEHFunclets();
+
+ const Function *F = MF->getFunction();
+
+ shouldEmitMoves = Asm->needsSEHMoves();
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ const Function *Per = nullptr;
+ if (F->hasPersonalityFn())
+ Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+
+ bool forceEmitPersonality =
+ F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+ F->needsUnwindTableEntry();
+
+ shouldEmitPersonality =
+ forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
+ PerEncoding != dwarf::DW_EH_PE_omit && Per);
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ shouldEmitLSDA = shouldEmitPersonality &&
+ LSDAEncoding != dwarf::DW_EH_PE_omit;
+
+ // If we're not using CFI, we don't want the CFI or the personality, but we
+ // might want EH tables if we had EH pads.
+ if (!Asm->MAI->usesWindowsCFI()) {
+ shouldEmitLSDA = hasEHFunclets;
+ shouldEmitPersonality = false;
+ return;
+ }
+
+ beginFunclet(MF->front(), Asm->CurrentFnSym);
+}
+
+/// endFunction - Gather and emit post-function exception information.
+///
+void WinException::endFunction(const MachineFunction *MF) {
+ if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA)
+ return;
+
+ const Function *F = MF->getFunction();
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F->hasPersonalityFn())
+ Per = classifyEHPersonality(F->getPersonalityFn());
+
+ // Get rid of any dead landing pads if we're not using funclets. In funclet
+ // schemes, the landing pad is not actually reachable. It only exists so
+ // that we can emit the right table data.
+ if (!isFuncletEHPersonality(Per))
+ MMI->TidyLandingPads();
+
+ endFunclet();
+
+ // endFunclet will emit the necessary .xdata tables for x64 SEH.
+ if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets())
+ return;
+
+ if (shouldEmitPersonality || shouldEmitLSDA) {
+ Asm->OutStreamer->PushSection();
+
+ // Just switch sections to the right xdata section.
+ MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
+ Asm->OutStreamer->getCurrentSectionOnly());
+ Asm->OutStreamer->SwitchSection(XData);
+
+ // Emit the tables appropriate to the personality function in use. If we
+ // don't recognize the personality, assume it uses an Itanium-style LSDA.
+ if (Per == EHPersonality::MSVC_Win64SEH)
+ emitCSpecificHandlerTable(MF);
+ else if (Per == EHPersonality::MSVC_X86SEH)
+ emitExceptHandlerTable(MF);
+ else if (Per == EHPersonality::MSVC_CXX)
+ emitCXXFrameHandler3Table(MF);
+ else if (Per == EHPersonality::CoreCLR)
+ emitCLRExceptionTable(MF);
+ else
+ emitExceptionTable();
+
+ Asm->OutStreamer->PopSection();
+ }
+}
+
+/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock.
+static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
+ const MachineBasicBlock *MBB) {
+ if (!MBB)
+ return nullptr;
+
+ assert(MBB->isEHFuncletEntry());
+
+ // Give catches and cleanups a name based off of their parent function and
+ // their funclet entry block's number.
+ const MachineFunction *MF = MBB->getParent();
+ const Function *F = MF->getFunction();
+ StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ MCContext &Ctx = MF->getContext();
+ StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch";
+ return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" +
+ Twine(MBB->getNumber()) + "@?0?" +
+ FuncLinkageName + "@4HA");
+}
+
+void WinException::beginFunclet(const MachineBasicBlock &MBB,
+ MCSymbol *Sym) {
+ CurrentFuncletEntry = &MBB;
+
+ const Function *F = Asm->MF->getFunction();
+ // If a symbol was not provided for the funclet, invent one.
+ if (!Sym) {
+ Sym = getMCSymbolForMBB(Asm, &MBB);
+
+ // Describe our funclet symbol as a function with internal linkage.
+ Asm->OutStreamer->BeginCOFFSymbolDef(Sym);
+ Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
+ Asm->OutStreamer->EndCOFFSymbolDef();
+
+ // We want our funclet's entry point to be aligned such that no nops will be
+ // present after the label.
+ Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()),
+ F);
+
+ // Now that we've emitted the alignment directive, point at our funclet.
+ Asm->OutStreamer->EmitLabel(Sym);
+ }
+
+ // Mark 'Sym' as starting our funclet.
+ if (shouldEmitMoves || shouldEmitPersonality)
+ Asm->OutStreamer->EmitWinCFIStartProc(Sym);
+
+ if (shouldEmitPersonality) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const Function *PerFn = nullptr;
+
+ // Determine which personality routine we are using for this funclet.
+ if (F->hasPersonalityFn())
+ PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ const MCSymbol *PersHandlerSym =
+ TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI);
+
+ // Classify the personality routine so that we may reason about it.
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F->hasPersonalityFn())
+ Per = classifyEHPersonality(F->getPersonalityFn());
+
+ // Do not emit a .seh_handler directive if it is a C++ cleanup funclet.
+ if (Per != EHPersonality::MSVC_CXX ||
+ !CurrentFuncletEntry->isCleanupFuncletEntry())
+ Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
+ }
+}
+
+void WinException::endFunclet() {
+ // No funclet to process? Great, we have nothing to do.
+ if (!CurrentFuncletEntry)
+ return;
+
+ if (shouldEmitMoves || shouldEmitPersonality) {
+ const Function *F = Asm->MF->getFunction();
+ EHPersonality Per = EHPersonality::Unknown;
+ if (F->hasPersonalityFn())
+ Per = classifyEHPersonality(F->getPersonalityFn());
+
+ // The .seh_handlerdata directive implicitly switches section, push the
+ // current section so that we may return to it.
+ Asm->OutStreamer->PushSection();
+
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+
+ if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality &&
+ !CurrentFuncletEntry->isCleanupFuncletEntry()) {
+ // If this is a C++ catch funclet (or the parent function),
+ // emit a reference to the LSDA for the parent function.
+ StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
+ Twine("$cppxdata$", FuncLinkageName));
+ Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
+ } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() &&
+ !CurrentFuncletEntry->isEHFuncletEntry()) {
+ // If this is the parent function in Win64 SEH, emit the LSDA immediately
+ // following .seh_handlerdata.
+ emitCSpecificHandlerTable(Asm->MF);
+ }
+
+ // Switch back to the previous section now that we are done writing to
+ // .xdata.
+ Asm->OutStreamer->PopSection();
+
+ // Emit a .seh_endproc directive to mark the end of the function.
+ Asm->OutStreamer->EmitWinCFIEndProc();
+ }
+
+ // Let's make sure we don't try to end the same funclet twice.
+ CurrentFuncletEntry = nullptr;
+}
+
+const MCExpr *WinException::create32bitRef(const MCSymbol *Value) {
+ if (!Value)
+ return MCConstantExpr::create(0, Asm->OutContext);
+ return MCSymbolRefExpr::create(Value, useImageRel32
+ ? MCSymbolRefExpr::VK_COFF_IMGREL32
+ : MCSymbolRefExpr::VK_None,
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
+ if (!GV)
+ return MCConstantExpr::create(0, Asm->OutContext);
+ return create32bitRef(Asm->getSymbol(GV));
+}
+
+const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+ return MCBinaryExpr::createAdd(create32bitRef(Label),
+ MCConstantExpr::create(1, Asm->OutContext),
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffset(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom) {
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(OffsetOf, Asm->OutContext),
+ MCSymbolRefExpr::create(OffsetFrom, Asm->OutContext), Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom) {
+ return MCBinaryExpr::createAdd(getOffset(OffsetOf, OffsetFrom),
+ MCConstantExpr::create(1, Asm->OutContext),
+ Asm->OutContext);
+}
+
+int WinException::getFrameIndexOffset(int FrameIndex,
+ const WinEHFuncInfo &FuncInfo) {
+ const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
+ unsigned UnusedReg;
+ if (Asm->MAI->usesWindowsCFI()) {
+ int Offset =
+ TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg,
+ /*IgnoreSPUpdates*/ true);
+ assert(UnusedReg ==
+ Asm->MF->getSubtarget()
+ .getTargetLowering()
+ ->getStackPointerRegisterToSaveRestore());
+ return Offset;
+ }
+
+ // For 32-bit, offsets should be relative to the end of the EH registration
+ // node. For 64-bit, it's relative to SP at the end of the prologue.
+ assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
+ int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
+ Offset += FuncInfo.EHRegNodeEndOffset;
+ return Offset;
+}
+
+namespace {
+
+/// Top-level state used to represent unwind to caller
+const int NullState = -1;
+
+struct InvokeStateChange {
+ /// EH Label immediately after the last invoke in the previous state, or
+ /// nullptr if the previous state was the null state.
+ const MCSymbol *PreviousEndLabel;
+
+ /// EH label immediately before the first invoke in the new state, or nullptr
+ /// if the new state is the null state.
+ const MCSymbol *NewStartLabel;
+
+ /// State of the invoke following NewStartLabel, or NullState to indicate
+ /// the presence of calls which may unwind to caller.
+ int NewState;
+};
+
+/// Iterator that reports all the invoke state changes in a range of machine
+/// basic blocks. Changes to the null state are reported whenever a call that
+/// may unwind to caller is encountered. The MBB range is expected to be an
+/// entire function or funclet, and the start and end of the range are treated
+/// as being in the NullState even if there's not an unwind-to-caller call
+/// before the first invoke or after the last one (i.e., the first state change
+/// reported is the first change to something other than NullState, and a
+/// change back to NullState is always reported at the end of iteration).
+class InvokeStateChangeIterator {
+ InvokeStateChangeIterator(const WinEHFuncInfo &EHInfo,
+ MachineFunction::const_iterator MFI,
+ MachineFunction::const_iterator MFE,
+ MachineBasicBlock::const_iterator MBBI,
+ int BaseState)
+ : EHInfo(EHInfo), MFI(MFI), MFE(MFE), MBBI(MBBI), BaseState(BaseState) {
+ LastStateChange.PreviousEndLabel = nullptr;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ scan();
+ }
+
+public:
+ static iterator_range<InvokeStateChangeIterator>
+ range(const WinEHFuncInfo &EHInfo, MachineFunction::const_iterator Begin,
+ MachineFunction::const_iterator End, int BaseState = NullState) {
+ // Reject empty ranges to simplify bookkeeping by ensuring that we can get
+ // the end of the last block.
+ assert(Begin != End);
+ auto BlockBegin = Begin->begin();
+ auto BlockEnd = std::prev(End)->end();
+ return make_range(
+ InvokeStateChangeIterator(EHInfo, Begin, End, BlockBegin, BaseState),
+ InvokeStateChangeIterator(EHInfo, End, End, BlockEnd, BaseState));
+ }
+
+ // Iterator methods.
+ bool operator==(const InvokeStateChangeIterator &O) const {
+ assert(BaseState == O.BaseState);
+ // Must be visiting same block.
+ if (MFI != O.MFI)
+ return false;
+ // Must be visiting same isntr.
+ if (MBBI != O.MBBI)
+ return false;
+ // At end of block/instr iteration, we can still have two distinct states:
+ // one to report the final EndLabel, and another indicating the end of the
+ // state change iteration. Check for CurrentEndLabel equality to
+ // distinguish these.
+ return CurrentEndLabel == O.CurrentEndLabel;
+ }
+
+ bool operator!=(const InvokeStateChangeIterator &O) const {
+ return !operator==(O);
+ }
+ InvokeStateChange &operator*() { return LastStateChange; }
+ InvokeStateChange *operator->() { return &LastStateChange; }
+ InvokeStateChangeIterator &operator++() { return scan(); }
+
+private:
+ InvokeStateChangeIterator &scan();
+
+ const WinEHFuncInfo &EHInfo;
+ const MCSymbol *CurrentEndLabel = nullptr;
+ MachineFunction::const_iterator MFI;
+ MachineFunction::const_iterator MFE;
+ MachineBasicBlock::const_iterator MBBI;
+ InvokeStateChange LastStateChange;
+ bool VisitingInvoke = false;
+ int BaseState;
+};
+
+} // end anonymous namespace
+
+InvokeStateChangeIterator &InvokeStateChangeIterator::scan() {
+ bool IsNewBlock = false;
+ for (; MFI != MFE; ++MFI, IsNewBlock = true) {
+ if (IsNewBlock)
+ MBBI = MFI->begin();
+ for (auto MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ const MachineInstr &MI = *MBBI;
+ if (!VisitingInvoke && LastStateChange.NewState != BaseState &&
+ MI.isCall() && !EHStreamer::callToNoUnwindFunction(&MI)) {
+ // Indicate a change of state to the null state. We don't have
+ // start/end EH labels handy but the caller won't expect them for
+ // null state regions.
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ CurrentEndLabel = nullptr;
+ // Don't re-visit this instr on the next scan
+ ++MBBI;
+ return *this;
+ }
+
+ // All other state changes are at EH labels before/after invokes.
+ if (!MI.isEHLabel())
+ continue;
+ MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+ if (Label == CurrentEndLabel) {
+ VisitingInvoke = false;
+ continue;
+ }
+ auto InvokeMapIter = EHInfo.LabelToStateMap.find(Label);
+ // Ignore EH labels that aren't the ones inserted before an invoke
+ if (InvokeMapIter == EHInfo.LabelToStateMap.end())
+ continue;
+ auto &StateAndEnd = InvokeMapIter->second;
+ int NewState = StateAndEnd.first;
+ // Keep track of the fact that we're between EH start/end labels so
+ // we know not to treat the inoke we'll see as unwinding to caller.
+ VisitingInvoke = true;
+ if (NewState == LastStateChange.NewState) {
+ // The state isn't actually changing here. Record the new end and
+ // keep going.
+ CurrentEndLabel = StateAndEnd.second;
+ continue;
+ }
+ // Found a state change to report
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = Label;
+ LastStateChange.NewState = NewState;
+ // Start keeping track of the new current end
+ CurrentEndLabel = StateAndEnd.second;
+ // Don't re-visit this instr on the next scan
+ ++MBBI;
+ return *this;
+ }
+ }
+ // Iteration hit the end of the block range.
+ if (LastStateChange.NewState != BaseState) {
+ // Report the end of the last new state
+ LastStateChange.PreviousEndLabel = CurrentEndLabel;
+ LastStateChange.NewStartLabel = nullptr;
+ LastStateChange.NewState = BaseState;
+ // Leave CurrentEndLabel non-null to distinguish this state from end.
+ assert(CurrentEndLabel != nullptr);
+ return *this;
+ }
+ // We've reported all state changes and hit the end state.
+ CurrentEndLabel = nullptr;
+ return *this;
+}
+
+/// Emit the language-specific data that __C_specific_handler expects. This
+/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
+/// up after faults with __try, __except, and __finally. The typeinfo values
+/// are not really RTTI data, but pointers to filter functions that return an
+/// integer (1, 0, or -1) indicating how to handle the exception. For __finally
+/// blocks and other cleanups, the landing pad label is zero, and the filter
+/// function is actually a cleanup handler with the same prototype. A catch-all
+/// entry is modeled with a null filter function field and a non-zero landing
+/// pad label.
+///
+/// Possible filter function return values:
+/// EXCEPTION_EXECUTE_HANDLER (1):
+/// Jump to the landing pad label after cleanups.
+/// EXCEPTION_CONTINUE_SEARCH (0):
+/// Continue searching this table or continue unwinding.
+/// EXCEPTION_CONTINUE_EXECUTION (-1):
+/// Resume execution at the trapping PC.
+///
+/// Inferred table structure:
+/// struct Table {
+/// int NumEntries;
+/// struct Entry {
+/// imagerel32 LabelStart;
+/// imagerel32 LabelEnd;
+/// imagerel32 FilterOrFinally; // One means catch-all.
+/// imagerel32 LabelLPad; // Zero means __finally.
+/// } Entries[NumEntries];
+/// };
+void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
+ auto &OS = *Asm->OutStreamer;
+ MCContext &Ctx = Asm->OutContext;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ // Emit a label assignment with the SEH frame offset so we can use it for
+ // llvm.x86.seh.recoverfp.
+ StringRef FLinkageName =
+ GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+ MCSymbol *ParentFrameOffset =
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ const MCExpr *MCOffset =
+ MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+
+ // Use the assembler to compute the number of table entries through label
+ // difference and division.
+ MCSymbol *TableBegin =
+ Ctx.createTempSymbol("lsda_begin", /*AlwaysAddSuffix=*/true);
+ MCSymbol *TableEnd =
+ Ctx.createTempSymbol("lsda_end", /*AlwaysAddSuffix=*/true);
+ const MCExpr *LabelDiff = getOffset(TableEnd, TableBegin);
+ const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx);
+ const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx);
+ AddComment("Number of call sites");
+ OS.EmitValue(EntryCount, 4);
+
+ OS.EmitLabel(TableBegin);
+
+ // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only
+ // models exceptions from invokes. LLVM also allows arbitrary reordering of
+ // the code, so our tables end up looking a bit different. Rather than
+ // trying to match MSVC's tables exactly, we emit a denormalized table. For
+ // each range of invokes in the same state, we emit table entries for all
+ // the actions that would be taken in that state. This means our tables are
+ // slightly bigger, which is OK.
+ const MCSymbol *LastStartLabel = nullptr;
+ int LastEHState = -1;
+ // Break out before we enter into a finally funclet.
+ // FIXME: We need to emit separate EH tables for cleanups.
+ MachineFunction::const_iterator End = MF->end();
+ MachineFunction::const_iterator Stop = std::next(MF->begin());
+ while (Stop != End && !Stop->isEHFuncletEntry())
+ ++Stop;
+ for (const auto &StateChange :
+ InvokeStateChangeIterator::range(FuncInfo, MF->begin(), Stop)) {
+ // Emit all the actions for the state we just transitioned out of
+ // if it was not the null state
+ if (LastEHState != -1)
+ emitSEHActionsForRange(FuncInfo, LastStartLabel,
+ StateChange.PreviousEndLabel, LastEHState);
+ LastStartLabel = StateChange.NewStartLabel;
+ LastEHState = StateChange.NewState;
+ }
+
+ OS.EmitLabel(TableEnd);
+}
+
+void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+ const MCSymbol *BeginLabel,
+ const MCSymbol *EndLabel, int State) {
+ auto &OS = *Asm->OutStreamer;
+ MCContext &Ctx = Asm->OutContext;
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ assert(BeginLabel && EndLabel);
+ while (State != -1) {
+ const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State];
+ const MCExpr *FilterOrFinally;
+ const MCExpr *ExceptOrNull;
+ auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ if (UME.IsFinally) {
+ FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler));
+ ExceptOrNull = MCConstantExpr::create(0, Ctx);
+ } else {
+ // For an except, the filter can be 1 (catch-all) or a function
+ // label.
+ FilterOrFinally = UME.Filter ? create32bitRef(UME.Filter)
+ : MCConstantExpr::create(1, Ctx);
+ ExceptOrNull = create32bitRef(Handler->getSymbol());
+ }
+
+ AddComment("LabelStart");
+ OS.EmitValue(getLabelPlusOne(BeginLabel), 4);
+ AddComment("LabelEnd");
+ OS.EmitValue(getLabelPlusOne(EndLabel), 4);
+ AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
+ : "CatchAll");
+ OS.EmitValue(FilterOrFinally, 4);
+ AddComment(UME.IsFinally ? "Null" : "ExceptionHandler");
+ OS.EmitValue(ExceptOrNull, 4);
+
+ assert(UME.ToState < State && "states should decrease");
+ State = UME.ToState;
+ }
+}
+
+void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
+ const Function *F = MF->getFunction();
+ auto &OS = *Asm->OutStreamer;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+
+ StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+
+ SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable;
+ MCSymbol *FuncInfoXData = nullptr;
+ if (shouldEmitPersonality) {
+ // If we're 64-bit, emit a pointer to the C++ EH data, and build a map from
+ // IPs to state numbers.
+ FuncInfoXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", FuncLinkageName));
+ computeIP2StateTable(MF, FuncInfo, IPToStateTable);
+ } else {
+ FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(FuncLinkageName);
+ }
+
+ int UnwindHelpOffset = 0;
+ if (Asm->MAI->usesWindowsCFI())
+ UnwindHelpOffset =
+ getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo);
+
+ MCSymbol *UnwindMapXData = nullptr;
+ MCSymbol *TryBlockMapXData = nullptr;
+ MCSymbol *IPToStateXData = nullptr;
+ if (!FuncInfo.CxxUnwindMap.empty())
+ UnwindMapXData = Asm->OutContext.getOrCreateSymbol(
+ Twine("$stateUnwindMap$", FuncLinkageName));
+ if (!FuncInfo.TryBlockMap.empty())
+ TryBlockMapXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$tryMap$", FuncLinkageName));
+ if (!IPToStateTable.empty())
+ IPToStateXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$ip2state$", FuncLinkageName));
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ // FuncInfo {
+ // uint32_t MagicNumber
+ // int32_t MaxState;
+ // UnwindMapEntry *UnwindMap;
+ // uint32_t NumTryBlocks;
+ // TryBlockMapEntry *TryBlockMap;
+ // uint32_t IPMapEntries; // always 0 for x86
+ // IPToStateMapEntry *IPToStateMap; // always 0 for x86
+ // uint32_t UnwindHelp; // non-x86 only
+ // ESTypeList *ESTypeList;
+ // int32_t EHFlags;
+ // }
+ // EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
+ // EHFlags & 2 -> ???
+ // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
+ OS.EmitValueToAlignment(4);
+ OS.EmitLabel(FuncInfoXData);
+
+ AddComment("MagicNumber");
+ OS.EmitIntValue(0x19930522, 4);
+
+ AddComment("MaxState");
+ OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4);
+
+ AddComment("UnwindMap");
+ OS.EmitValue(create32bitRef(UnwindMapXData), 4);
+
+ AddComment("NumTryBlocks");
+ OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);
+
+ AddComment("TryBlockMap");
+ OS.EmitValue(create32bitRef(TryBlockMapXData), 4);
+
+ AddComment("IPMapEntries");
+ OS.EmitIntValue(IPToStateTable.size(), 4);
+
+ AddComment("IPToStateXData");
+ OS.EmitValue(create32bitRef(IPToStateXData), 4);
+
+ if (Asm->MAI->usesWindowsCFI()) {
+ AddComment("UnwindHelp");
+ OS.EmitIntValue(UnwindHelpOffset, 4);
+ }
+
+ AddComment("ESTypeList");
+ OS.EmitIntValue(0, 4);
+
+ AddComment("EHFlags");
+ OS.EmitIntValue(1, 4);
+
+ // UnwindMapEntry {
+ // int32_t ToState;
+ // void (*Action)();
+ // };
+ if (UnwindMapXData) {
+ OS.EmitLabel(UnwindMapXData);
+ for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) {
+ MCSymbol *CleanupSym =
+ getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>());
+ AddComment("ToState");
+ OS.EmitIntValue(UME.ToState, 4);
+
+ AddComment("Action");
+ OS.EmitValue(create32bitRef(CleanupSym), 4);
+ }
+ }
+
+ // TryBlockMap {
+ // int32_t TryLow;
+ // int32_t TryHigh;
+ // int32_t CatchHigh;
+ // int32_t NumCatches;
+ // HandlerType *HandlerArray;
+ // };
+ if (TryBlockMapXData) {
+ OS.EmitLabel(TryBlockMapXData);
+ SmallVector<MCSymbol *, 1> HandlerMaps;
+ for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+ const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+
+ MCSymbol *HandlerMapXData = nullptr;
+ if (!TBME.HandlerArray.empty())
+ HandlerMapXData =
+ Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$")
+ .concat(Twine(I))
+ .concat("$")
+ .concat(FuncLinkageName));
+ HandlerMaps.push_back(HandlerMapXData);
+
+ // TBMEs should form intervals.
+ assert(0 <= TBME.TryLow && "bad trymap interval");
+ assert(TBME.TryLow <= TBME.TryHigh && "bad trymap interval");
+ assert(TBME.TryHigh < TBME.CatchHigh && "bad trymap interval");
+ assert(TBME.CatchHigh < int(FuncInfo.CxxUnwindMap.size()) &&
+ "bad trymap interval");
+
+ AddComment("TryLow");
+ OS.EmitIntValue(TBME.TryLow, 4);
+
+ AddComment("TryHigh");
+ OS.EmitIntValue(TBME.TryHigh, 4);
+
+ AddComment("CatchHigh");
+ OS.EmitIntValue(TBME.CatchHigh, 4);
+
+ AddComment("NumCatches");
+ OS.EmitIntValue(TBME.HandlerArray.size(), 4);
+
+ AddComment("HandlerArray");
+ OS.EmitValue(create32bitRef(HandlerMapXData), 4);
+ }
+
+ // All funclets use the same parent frame offset currently.
+ unsigned ParentFrameOffset = 0;
+ if (shouldEmitPersonality) {
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ ParentFrameOffset = TFI->getWinEHParentFrameOffset(*MF);
+ }
+
+ for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+ const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+ MCSymbol *HandlerMapXData = HandlerMaps[I];
+ if (!HandlerMapXData)
+ continue;
+ // HandlerType {
+ // int32_t Adjectives;
+ // TypeDescriptor *Type;
+ // int32_t CatchObjOffset;
+ // void (*Handler)();
+ // int32_t ParentFrameOffset; // x64 only
+ // };
+ OS.EmitLabel(HandlerMapXData);
+ for (const WinEHHandlerType &HT : TBME.HandlerArray) {
+ // Get the frame escape label with the offset of the catch object. If
+ // the index is INT_MAX, then there is no catch object, and we should
+ // emit an offset of zero, indicating that no copy will occur.
+ const MCExpr *FrameAllocOffsetRef = nullptr;
+ if (HT.CatchObj.FrameIndex != INT_MAX) {
+ int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo);
+ assert(Offset != 0 && "Illegal offset for catch object!");
+ FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext);
+ } else {
+ FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);
+ }
+
+ MCSymbol *HandlerSym =
+ getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>());
+
+ AddComment("Adjectives");
+ OS.EmitIntValue(HT.Adjectives, 4);
+
+ AddComment("Type");
+ OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4);
+
+ AddComment("CatchObjOffset");
+ OS.EmitValue(FrameAllocOffsetRef, 4);
+
+ AddComment("Handler");
+ OS.EmitValue(create32bitRef(HandlerSym), 4);
+
+ if (shouldEmitPersonality) {
+ AddComment("ParentFrameOffset");
+ OS.EmitIntValue(ParentFrameOffset, 4);
+ }
+ }
+ }
+ }
+
+ // IPToStateMapEntry {
+ // void *IP;
+ // int32_t State;
+ // };
+ if (IPToStateXData) {
+ OS.EmitLabel(IPToStateXData);
+ for (auto &IPStatePair : IPToStateTable) {
+ AddComment("IP");
+ OS.EmitValue(IPStatePair.first, 4);
+ AddComment("ToState");
+ OS.EmitIntValue(IPStatePair.second, 4);
+ }
+ }
+}
+
+void WinException::computeIP2StateTable(
+ const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+ SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable) {
+
+ for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+ FuncletEnd = MF->begin(),
+ End = MF->end();
+ FuncletStart != End; FuncletStart = FuncletEnd) {
+ // Find the end of the funclet
+ while (++FuncletEnd != End) {
+ if (FuncletEnd->isEHFuncletEntry()) {
+ break;
+ }
+ }
+
+ // Don't emit ip2state entries for cleanup funclets. Any interesting
+ // exceptional actions in cleanups must be handled in a separate IR
+ // function.
+ if (FuncletStart->isCleanupFuncletEntry())
+ continue;
+
+ MCSymbol *StartLabel;
+ int BaseState;
+ if (FuncletStart == MF->begin()) {
+ BaseState = NullState;
+ StartLabel = Asm->getFunctionBegin();
+ } else {
+ auto *FuncletPad =
+ cast<FuncletPadInst>(FuncletStart->getBasicBlock()->getFirstNonPHI());
+ assert(FuncInfo.FuncletBaseStateMap.count(FuncletPad) != 0);
+ BaseState = FuncInfo.FuncletBaseStateMap.find(FuncletPad)->second;
+ StartLabel = getMCSymbolForMBB(Asm, &*FuncletStart);
+ }
+ assert(StartLabel && "need local function start label");
+ IPToStateTable.push_back(
+ std::make_pair(create32bitRef(StartLabel), BaseState));
+
+ for (const auto &StateChange : InvokeStateChangeIterator::range(
+ FuncInfo, FuncletStart, FuncletEnd, BaseState)) {
+ // Compute the label to report as the start of this entry; use the EH
+ // start label for the invoke if we have one, otherwise (this is a call
+ // which may unwind to our caller and does not have an EH start label, so)
+ // use the previous end label.
+ const MCSymbol *ChangeLabel = StateChange.NewStartLabel;
+ if (!ChangeLabel)
+ ChangeLabel = StateChange.PreviousEndLabel;
+ // Emit an entry indicating that PCs after 'Label' have this EH state.
+ IPToStateTable.push_back(
+ std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState));
+ // FIXME: assert that NewState is between CatchLow and CatchHigh.
+ }
+ }
+}
+
+void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
+ StringRef FLinkageName) {
+ // Outlined helpers called by the EH runtime need to know the offset of the EH
+ // registration in order to recover the parent frame pointer. Now that we know
+ // we've code generated the parent, we can emit the label assignment that
+ // those helpers use to get the offset of the registration node.
+ MCContext &Ctx = Asm->OutContext;
+ MCSymbol *ParentFrameOffset =
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ unsigned UnusedReg;
+ const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+ int64_t Offset = TFI->getFrameIndexReference(
+ *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg);
+ const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+}
+
+/// Emit the language-specific data that _except_handler3 and 4 expect. This is
+/// functionally equivalent to the __C_specific_handler table, except it is
+/// indexed by state number instead of IP.
+void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
+ MCStreamer &OS = *Asm->OutStreamer;
+ const Function *F = MF->getFunction();
+ StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName());
+
+ bool VerboseAsm = OS.isVerboseAsm();
+ auto AddComment = [&](const Twine &Comment) {
+ if (VerboseAsm)
+ OS.AddComment(Comment);
+ };
+
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+ emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
+
+ // Emit the __ehtable label that we use for llvm.x86.seh.lsda.
+ MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName);
+ OS.EmitValueToAlignment(4);
+ OS.EmitLabel(LSDALabel);
+
+ const Function *Per =
+ dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+ StringRef PerName = Per->getName();
+ int BaseState = -1;
+ if (PerName == "_except_handler4") {
+ // The LSDA for _except_handler4 starts with this struct, followed by the
+ // scope table:
+ //
+ // struct EH4ScopeTable {
+ // int32_t GSCookieOffset;
+ // int32_t GSCookieXOROffset;
+ // int32_t EHCookieOffset;
+ // int32_t EHCookieXOROffset;
+ // ScopeTableEntry ScopeRecord[];
+ // };
+ //
+ // Offsets are %ebp relative.
+ //
+ // The GS cookie is present only if the function needs stack protection.
+ // GSCookieOffset = -2 means that GS cookie is not used.
+ //
+ // The EH cookie is always present.
+ //
+ // Check is done the following way:
+ // (ebp+CookieXOROffset) ^ [ebp+CookieOffset] == _security_cookie
+
+ // Retrieve the Guard Stack slot.
+ int GSCookieOffset = -2;
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->hasStackProtectorIndex()) {
+ unsigned UnusedReg;
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ int SSPIdx = MFI->getStackProtectorIndex();
+ GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg);
+ }
+
+ // Retrieve the EH Guard slot.
+ // TODO(etienneb): Get rid of this value and change it for and assertion.
+ int EHCookieOffset = 9999;
+ if (FuncInfo.EHGuardFrameIndex != INT_MAX) {
+ unsigned UnusedReg;
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ int EHGuardIdx = FuncInfo.EHGuardFrameIndex;
+ EHCookieOffset = TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg);
+ }
+
+ AddComment("GSCookieOffset");
+ OS.EmitIntValue(GSCookieOffset, 4);
+ AddComment("GSCookieXOROffset");
+ OS.EmitIntValue(0, 4);
+ AddComment("EHCookieOffset");
+ OS.EmitIntValue(EHCookieOffset, 4);
+ AddComment("EHCookieXOROffset");
+ OS.EmitIntValue(0, 4);
+ BaseState = -2;
+ }
+
+ assert(!FuncInfo.SEHUnwindMap.empty());
+ for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) {
+ auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ const MCSymbol *ExceptOrFinally =
+ UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol();
+ // -1 is usually the base state for "unwind to caller", but for
+ // _except_handler4 it's -2. Do that replacement here if necessary.
+ int ToState = UME.ToState == -1 ? BaseState : UME.ToState;
+ AddComment("ToState");
+ OS.EmitIntValue(ToState, 4);
+ AddComment(UME.IsFinally ? "Null" : "FilterFunction");
+ OS.EmitValue(create32bitRef(UME.Filter), 4);
+ AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler");
+ OS.EmitValue(create32bitRef(ExceptOrFinally), 4);
+ }
+}
+
+static int getTryRank(const WinEHFuncInfo &FuncInfo, int State) {
+ int Rank = 0;
+ while (State != -1) {
+ ++Rank;
+ State = FuncInfo.ClrEHUnwindMap[State].TryParentState;
+ }
+ return Rank;
+}
+
+static int getTryAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
+ int LeftRank = getTryRank(FuncInfo, Left);
+ int RightRank = getTryRank(FuncInfo, Right);
+
+ while (LeftRank < RightRank) {
+ Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
+ --RightRank;
+ }
+
+ while (RightRank < LeftRank) {
+ Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
+ --LeftRank;
+ }
+
+ while (Left != Right) {
+ Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState;
+ Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState;
+ }
+
+ return Left;
+}
+
+void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
+ // CLR EH "states" are really just IDs that identify handlers/funclets;
+ // states, handlers, and funclets all have 1:1 mappings between them, and a
+ // handler/funclet's "state" is its index in the ClrEHUnwindMap.
+ MCStreamer &OS = *Asm->OutStreamer;
+ const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+ MCSymbol *FuncBeginSym = Asm->getFunctionBegin();
+ MCSymbol *FuncEndSym = Asm->getFunctionEnd();
+
+ // A ClrClause describes a protected region.
+ struct ClrClause {
+ const MCSymbol *StartLabel; // Start of protected region
+ const MCSymbol *EndLabel; // End of protected region
+ int State; // Index of handler protecting the protected region
+ int EnclosingState; // Index of funclet enclosing the protected region
+ };
+ SmallVector<ClrClause, 8> Clauses;
+
+ // Build a map from handler MBBs to their corresponding states (i.e. their
+ // indices in the ClrEHUnwindMap).
+ int NumStates = FuncInfo.ClrEHUnwindMap.size();
+ assert(NumStates > 0 && "Don't need exception table!");
+ DenseMap<const MachineBasicBlock *, int> HandlerStates;
+ for (int State = 0; State < NumStates; ++State) {
+ MachineBasicBlock *HandlerBlock =
+ FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
+ HandlerStates[HandlerBlock] = State;
+ // Use this loop through all handlers to verify our assumption (used in
+ // the MinEnclosingState computation) that enclosing funclets have lower
+ // state numbers than their enclosed funclets.
+ assert(FuncInfo.ClrEHUnwindMap[State].HandlerParentState < State &&
+ "ill-formed state numbering");
+ }
+ // Map the main function to the NullState.
+ HandlerStates[&MF->front()] = NullState;
+
+ // Write out a sentinel indicating the end of the standard (Windows) xdata
+ // and the start of the additional (CLR) info.
+ OS.EmitIntValue(0xffffffff, 4);
+ // Write out the number of funclets
+ OS.EmitIntValue(NumStates, 4);
+
+ // Walk the machine blocks/instrs, computing and emitting a few things:
+ // 1. Emit a list of the offsets to each handler entry, in lexical order.
+ // 2. Compute a map (EndSymbolMap) from each funclet to the symbol at its end.
+ // 3. Compute the list of ClrClauses, in the required order (inner before
+ // outer, earlier before later; the order by which a forward scan with
+ // early termination will find the innermost enclosing clause covering
+ // a given address).
+ // 4. A map (MinClauseMap) from each handler index to the index of the
+ // outermost funclet/function which contains a try clause targeting the
+ // key handler. This will be used to determine IsDuplicate-ness when
+ // emitting ClrClauses. The NullState value is used to indicate that the
+ // top-level function contains a try clause targeting the key handler.
+ // HandlerStack is a stack of (PendingStartLabel, PendingState) pairs for
+ // try regions we entered before entering the PendingState try but which
+ // we haven't yet exited.
+ SmallVector<std::pair<const MCSymbol *, int>, 4> HandlerStack;
+ // EndSymbolMap and MinClauseMap are maps described above.
+ std::unique_ptr<MCSymbol *[]> EndSymbolMap(new MCSymbol *[NumStates]);
+ SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates);
+
+ // Visit the root function and each funclet.
+ for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+ FuncletEnd = MF->begin(),
+ End = MF->end();
+ FuncletStart != End; FuncletStart = FuncletEnd) {
+ int FuncletState = HandlerStates[&*FuncletStart];
+ // Find the end of the funclet
+ MCSymbol *EndSymbol = FuncEndSym;
+ while (++FuncletEnd != End) {
+ if (FuncletEnd->isEHFuncletEntry()) {
+ EndSymbol = getMCSymbolForMBB(Asm, &*FuncletEnd);
+ break;
+ }
+ }
+ // Emit the function/funclet end and, if this is a funclet (and not the
+ // root function), record it in the EndSymbolMap.
+ OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4);
+ if (FuncletState != NullState) {
+ // Record the end of the handler.
+ EndSymbolMap[FuncletState] = EndSymbol;
+ }
+
+ // Walk the state changes in this function/funclet and compute its clauses.
+ // Funclets always start in the null state.
+ const MCSymbol *CurrentStartLabel = nullptr;
+ int CurrentState = NullState;
+ assert(HandlerStack.empty());
+ for (const auto &StateChange :
+ InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) {
+ // Close any try regions we're not still under
+ int StillPendingState =
+ getTryAncestor(FuncInfo, CurrentState, StateChange.NewState);
+ while (CurrentState != StillPendingState) {
+ assert(CurrentState != NullState &&
+ "Failed to find still-pending state!");
+ // Close the pending clause
+ Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel,
+ CurrentState, FuncletState});
+ // Now the next-outer try region is current
+ CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].TryParentState;
+ // Pop the new start label from the handler stack if we've exited all
+ // inner try regions of the corresponding try region.
+ if (HandlerStack.back().second == CurrentState)
+ CurrentStartLabel = HandlerStack.pop_back_val().first;
+ }
+
+ if (StateChange.NewState != CurrentState) {
+ // For each clause we're starting, update the MinClauseMap so we can
+ // know which is the topmost funclet containing a clause targeting
+ // it.
+ for (int EnteredState = StateChange.NewState;
+ EnteredState != CurrentState;
+ EnteredState =
+ FuncInfo.ClrEHUnwindMap[EnteredState].TryParentState) {
+ int &MinEnclosingState = MinClauseMap[EnteredState];
+ if (FuncletState < MinEnclosingState)
+ MinEnclosingState = FuncletState;
+ }
+ // Save the previous current start/label on the stack and update to
+ // the newly-current start/state.
+ HandlerStack.emplace_back(CurrentStartLabel, CurrentState);
+ CurrentStartLabel = StateChange.NewStartLabel;
+ CurrentState = StateChange.NewState;
+ }
+ }
+ assert(HandlerStack.empty());
+ }
+
+ // Now emit the clause info, starting with the number of clauses.
+ OS.EmitIntValue(Clauses.size(), 4);
+ for (ClrClause &Clause : Clauses) {
+ // Emit a CORINFO_EH_CLAUSE :
+ /*
+ struct CORINFO_EH_CLAUSE
+ {
+ CORINFO_EH_CLAUSE_FLAGS Flags; // actually a CorExceptionFlag
+ DWORD TryOffset;
+ DWORD TryLength; // actually TryEndOffset
+ DWORD HandlerOffset;
+ DWORD HandlerLength; // actually HandlerEndOffset
+ union
+ {
+ DWORD ClassToken; // use for catch clauses
+ DWORD FilterOffset; // use for filter clauses
+ };
+ };
+
+ enum CORINFO_EH_CLAUSE_FLAGS
+ {
+ CORINFO_EH_CLAUSE_NONE = 0,
+ CORINFO_EH_CLAUSE_FILTER = 0x0001, // This clause is for a filter
+ CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause
+ CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause
+ };
+ typedef enum CorExceptionFlag
+ {
+ COR_ILEXCEPTION_CLAUSE_NONE,
+ COR_ILEXCEPTION_CLAUSE_FILTER = 0x0001, // This is a filter clause
+ COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This is a finally clause
+ COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004, // This is a fault clause
+ COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This
+ // clause was duplicated
+ // to a funclet which was
+ // pulled out of line
+ } CorExceptionFlag;
+ */
+ // Add 1 to the start/end of the EH clause; the IP associated with a
+ // call when the runtime does its scan is the IP of the next instruction
+ // (the one to which control will return after the call), so we need
+ // to add 1 to the end of the clause to cover that offset. We also add
+ // 1 to the start of the clause to make sure that the ranges reported
+ // for all clauses are disjoint. Note that we'll need some additional
+ // logic when machine traps are supported, since in that case the IP
+ // that the runtime uses is the offset of the faulting instruction
+ // itself; if such an instruction immediately follows a call but the
+ // two belong to different clauses, we'll need to insert a nop between
+ // them so the runtime can distinguish the point to which the call will
+ // return from the point at which the fault occurs.
+
+ const MCExpr *ClauseBegin =
+ getOffsetPlusOne(Clause.StartLabel, FuncBeginSym);
+ const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym);
+
+ const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State];
+ MachineBasicBlock *HandlerBlock = Entry.Handler.get<MachineBasicBlock *>();
+ MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock);
+ const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym);
+ MCSymbol *EndSym = EndSymbolMap[Clause.State];
+ const MCExpr *HandlerEnd = getOffset(EndSym, FuncBeginSym);
+
+ uint32_t Flags = 0;
+ switch (Entry.HandlerType) {
+ case ClrHandlerType::Catch:
+ // Leaving bits 0-2 clear indicates catch.
+ break;
+ case ClrHandlerType::Filter:
+ Flags |= 1;
+ break;
+ case ClrHandlerType::Finally:
+ Flags |= 2;
+ break;
+ case ClrHandlerType::Fault:
+ Flags |= 4;
+ break;
+ }
+ if (Clause.EnclosingState != MinClauseMap[Clause.State]) {
+ // This is a "duplicate" clause; the handler needs to be entered from a
+ // frame above the one holding the invoke.
+ assert(Clause.EnclosingState > MinClauseMap[Clause.State]);
+ Flags |= 8;
+ }
+ OS.EmitIntValue(Flags, 4);
+
+ // Write the clause start/end
+ OS.EmitValue(ClauseBegin, 4);
+ OS.EmitValue(ClauseEnd, 4);
+
+ // Write out the handler start/end
+ OS.EmitValue(HandlerBegin, 4);
+ OS.EmitValue(HandlerEnd, 4);
+
+ // Write out the type token or filter offset
+ assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters");
+ OS.EmitIntValue(Entry.TypeToken, 4);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
new file mode 100644
index 000000000000..acb301016910
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -0,0 +1,106 @@
+//===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing windows exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H
+
+#include "EHStreamer.h"
+
+namespace llvm {
+class Function;
+class GlobalValue;
+class MachineFunction;
+class MCExpr;
+class Value;
+struct WinEHFuncInfo;
+
+class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
+ /// Per-function flag to indicate if personality info should be emitted.
+ bool shouldEmitPersonality = false;
+
+ /// Per-function flag to indicate if the LSDA should be emitted.
+ bool shouldEmitLSDA = false;
+
+ /// Per-function flag to indicate if frame moves info should be emitted.
+ bool shouldEmitMoves = false;
+
+ /// True if this is a 64-bit target and we should use image relative offsets.
+ bool useImageRel32 = false;
+
+ /// Pointer to the current funclet entry BB.
+ const MachineBasicBlock *CurrentFuncletEntry = nullptr;
+
+ void emitCSpecificHandlerTable(const MachineFunction *MF);
+
+ void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+ const MCSymbol *BeginLabel,
+ const MCSymbol *EndLabel, int State);
+
+ /// Emit the EH table data for 32-bit and 64-bit functions using
+ /// the __CxxFrameHandler3 personality.
+ void emitCXXFrameHandler3Table(const MachineFunction *MF);
+
+ /// Emit the EH table data for _except_handler3 and _except_handler4
+ /// personality functions. These are only used on 32-bit and do not use CFI
+ /// tables.
+ void emitExceptHandlerTable(const MachineFunction *MF);
+
+ void emitCLRExceptionTable(const MachineFunction *MF);
+
+ void computeIP2StateTable(
+ const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+ SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable);
+
+ /// Emits the label used with llvm.x86.seh.recoverfp, which is used by
+ /// outlined funclets.
+ void emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
+ StringRef FLinkageName);
+
+ const MCExpr *create32bitRef(const MCSymbol *Value);
+ const MCExpr *create32bitRef(const GlobalValue *GV);
+ const MCExpr *getLabelPlusOne(const MCSymbol *Label);
+ const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
+ const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
+ const MCSymbol *OffsetFrom);
+
+ /// Gets the offset that we should use in a table for a stack object with the
+ /// given index. For targets using CFI (Win64, etc), this is relative to the
+ /// established SP at the end of the prologue. For targets without CFI (Win32
+ /// only), it is relative to the frame pointer.
+ int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo);
+
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ WinException(AsmPrinter *A);
+ ~WinException() override;
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+
+ /// \brief Emit target-specific EH funclet machinery.
+ void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override;
+ void endFunclet() override;
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
new file mode 100644
index 000000000000..bf5cf105a8f8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -0,0 +1,1640 @@
+//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass (at IR level) to replace atomic instructions with
+// __atomic_* library calls, or target specific instruction which implement the
+// same semantics in a way which better fits the target backend. This can
+// include the use of (intrinsic-based) load-linked/store-conditional loops,
+// AtomicCmpXchg, or type coercions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/AtomicExpandUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "atomic-expand"
+
+namespace {
+ class AtomicExpand: public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLowering *TLI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit AtomicExpand(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM), TLI(nullptr) {
+ initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ private:
+ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+ bool IsStore, bool IsLoad);
+ IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
+ LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
+ bool tryExpandAtomicLoad(LoadInst *LI);
+ bool expandAtomicLoadToLL(LoadInst *LI);
+ bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+ StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
+ bool expandAtomicStore(StoreInst *SI);
+ bool tryExpandAtomicRMW(AtomicRMWInst *AI);
+ Value *
+ insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
+ void expandAtomicOpToLLSC(
+ Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
+ void expandPartwordAtomicRMW(
+ AtomicRMWInst *I,
+ TargetLoweringBase::AtomicExpansionKind ExpansionKind);
+ void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+
+ AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
+ static Value *insertRMWCmpXchgLoop(
+ IRBuilder<> &Builder, Type *ResultType, Value *Addr,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
+ CreateCmpXchgInstFun CreateCmpXchg);
+
+ bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+ bool isIdempotentRMW(AtomicRMWInst *AI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *AI);
+
+ bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
+ Value *PointerOperand, Value *ValueOperand,
+ Value *CASExpected, AtomicOrdering Ordering,
+ AtomicOrdering Ordering2,
+ ArrayRef<RTLIB::Libcall> Libcalls);
+ void expandAtomicLoadToLibcall(LoadInst *LI);
+ void expandAtomicStoreToLibcall(StoreInst *LI);
+ void expandAtomicRMWToLibcall(AtomicRMWInst *I);
+ void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
+
+ friend bool
+ llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg);
+ };
+}
+
+char AtomicExpand::ID = 0;
+char &llvm::AtomicExpandID = AtomicExpand::ID;
+INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions",
+ false, false)
+
+FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
+ return new AtomicExpand(TM);
+}
+
+namespace {
+// Helper functions to retrieve the size of atomic instructions.
+unsigned getAtomicOpSize(LoadInst *LI) {
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(LI->getType());
+}
+
+unsigned getAtomicOpSize(StoreInst *SI) {
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(SI->getValueOperand()->getType());
+}
+
+unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
+ const DataLayout &DL = RMWI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
+}
+
+unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
+ const DataLayout &DL = CASI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
+}
+
+// Helper functions to retrieve the alignment of atomic instructions.
+unsigned getAtomicOpAlign(LoadInst *LI) {
+ unsigned Align = LI->getAlignment();
+ // In the future, if this IR restriction is relaxed, we should
+ // return DataLayout::getABITypeAlignment when there's no align
+ // value.
+ assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
+ return Align;
+}
+
+unsigned getAtomicOpAlign(StoreInst *SI) {
+ unsigned Align = SI->getAlignment();
+ // In the future, if this IR restriction is relaxed, we should
+ // return DataLayout::getABITypeAlignment when there's no align
+ // value.
+ assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
+ return Align;
+}
+
+unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
+ // TODO(PR27168): This instruction has no alignment attribute, but unlike the
+ // default alignment for load/store, the default here is to assume
+ // it has NATURAL alignment, not DataLayout-specified alignment.
+ const DataLayout &DL = RMWI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
+}
+
+unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
+ // TODO(PR27168): same comment as above.
+ const DataLayout &DL = CASI->getModule()->getDataLayout();
+ return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
+}
+
+// Determine if a particular atomic operation has a supported size,
+// and is of appropriate alignment, to be passed through for target
+// lowering. (Versus turning into a __atomic libcall)
+template <typename Inst>
+bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+ return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
+}
+
+} // end anonymous namespace
+
+bool AtomicExpand::runOnFunction(Function &F) {
+ if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
+ return false;
+ TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+
+ SmallVector<Instruction *, 1> AtomicInsts;
+
+ // Changing control-flow while iterating through it is a bad idea, so gather a
+ // list of all atomic instructions before we start.
+ for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+ Instruction *I = &*II;
+ if (I->isAtomic() && !isa<FenceInst>(I))
+ AtomicInsts.push_back(I);
+ }
+
+ bool MadeChange = false;
+ for (auto I : AtomicInsts) {
+ auto LI = dyn_cast<LoadInst>(I);
+ auto SI = dyn_cast<StoreInst>(I);
+ auto RMWI = dyn_cast<AtomicRMWInst>(I);
+ auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
+ assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
+
+ // If the Size/Alignment is not supported, replace with a libcall.
+ if (LI) {
+ if (!atomicSizeSupported(TLI, LI)) {
+ expandAtomicLoadToLibcall(LI);
+ MadeChange = true;
+ continue;
+ }
+ } else if (SI) {
+ if (!atomicSizeSupported(TLI, SI)) {
+ expandAtomicStoreToLibcall(SI);
+ MadeChange = true;
+ continue;
+ }
+ } else if (RMWI) {
+ if (!atomicSizeSupported(TLI, RMWI)) {
+ expandAtomicRMWToLibcall(RMWI);
+ MadeChange = true;
+ continue;
+ }
+ } else if (CASI) {
+ if (!atomicSizeSupported(TLI, CASI)) {
+ expandAtomicCASToLibcall(CASI);
+ MadeChange = true;
+ continue;
+ }
+ }
+
+ if (TLI->shouldInsertFencesForAtomic(I)) {
+ auto FenceOrdering = AtomicOrdering::Monotonic;
+ bool IsStore, IsLoad;
+ if (LI && isAcquireOrStronger(LI->getOrdering())) {
+ FenceOrdering = LI->getOrdering();
+ LI->setOrdering(AtomicOrdering::Monotonic);
+ IsStore = false;
+ IsLoad = true;
+ } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
+ FenceOrdering = SI->getOrdering();
+ SI->setOrdering(AtomicOrdering::Monotonic);
+ IsStore = true;
+ IsLoad = false;
+ } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
+ isAcquireOrStronger(RMWI->getOrdering()))) {
+ FenceOrdering = RMWI->getOrdering();
+ RMWI->setOrdering(AtomicOrdering::Monotonic);
+ IsStore = IsLoad = true;
+ } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
+ (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
+ isAcquireOrStronger(CASI->getSuccessOrdering()))) {
+ // If a compare and swap is lowered to LL/SC, we can do smarter fence
+ // insertion, with a stronger one on the success path than on the
+ // failure path. As a result, fence insertion is directly done by
+ // expandAtomicCmpXchg in that case.
+ FenceOrdering = CASI->getSuccessOrdering();
+ CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
+ CASI->setFailureOrdering(AtomicOrdering::Monotonic);
+ IsStore = IsLoad = true;
+ }
+
+ if (FenceOrdering != AtomicOrdering::Monotonic) {
+ MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
+ }
+ }
+
+ if (LI) {
+ if (LI->getType()->isFloatingPointTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ LI = convertAtomicLoadToIntegerType(LI);
+ assert(LI->getType()->isIntegerTy() && "invariant broken");
+ MadeChange = true;
+ }
+
+ MadeChange |= tryExpandAtomicLoad(LI);
+ } else if (SI) {
+ if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ SI = convertAtomicStoreToIntegerType(SI);
+ assert(SI->getValueOperand()->getType()->isIntegerTy() &&
+ "invariant broken");
+ MadeChange = true;
+ }
+
+ if (TLI->shouldExpandAtomicStoreInIR(SI))
+ MadeChange |= expandAtomicStore(SI);
+ } else if (RMWI) {
+ // There are two different ways of expanding RMW instructions:
+ // - into a load if it is idempotent
+ // - into a Cmpxchg/LL-SC loop otherwise
+ // we try them in that order.
+
+ if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+ MadeChange = true;
+ } else {
+ MadeChange |= tryExpandAtomicRMW(RMWI);
+ }
+ } else if (CASI) {
+ // TODO: when we're ready to make the change at the IR level, we can
+ // extend convertCmpXchgToInteger for floating point too.
+ assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
+ "unimplemented - floating point not legal at IR level");
+ if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ CASI = convertCmpXchgToIntegerType(CASI);
+ assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
+ "invariant broken");
+ MadeChange = true;
+ }
+
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(CASI);
+ if (ValueSize < MinCASSize) {
+ assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
+ "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
+ expandPartwordCmpXchg(CASI);
+ } else {
+ if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
+ MadeChange |= expandAtomicCmpXchg(CASI);
+ }
+ }
+ }
+ return MadeChange;
+}
+
+bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+ bool IsStore, bool IsLoad) {
+ IRBuilder<> Builder(I);
+
+ auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
+
+ auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
+ // The trailing fence is emitted before the instruction instead of after
+ // because there is no easy way of setting Builder insertion point after
+ // an instruction. So we must erase it from the BB, and insert it back
+ // in the right place.
+ // We have a guard here because not every atomic operation generates a
+ // trailing fence.
+ if (TrailingFence) {
+ TrailingFence->removeFromParent();
+ TrailingFence->insertAfter(I);
+ }
+
+ return (LeadingFence || TrailingFence);
+}
+
+/// Get the iX type with the same bitwidth as T.
+IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
+ const DataLayout &DL) {
+ EVT VT = TLI->getValueType(DL, T);
+ unsigned BitWidth = VT.getStoreSizeInBits();
+ assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
+ return IntegerType::get(T->getContext(), BitWidth);
+}
+
+/// Convert an atomic load of a non-integral type to an integer load of the
+/// equivalent bitwidth. See the function comment on
+/// convertAtomicStoreToIntegerType for background.
+LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
+ auto *M = LI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(LI->getType(),
+ M->getDataLayout());
+
+ IRBuilder<> Builder(LI);
+
+ Value *Addr = LI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy,
+ Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ auto *NewLI = Builder.CreateLoad(NewAddr);
+ NewLI->setAlignment(LI->getAlignment());
+ NewLI->setVolatile(LI->isVolatile());
+ NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
+ DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
+
+ Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
+ LI->replaceAllUsesWith(NewVal);
+ LI->eraseFromParent();
+ return NewLI;
+}
+
+bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
+ switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC:
+ expandAtomicOpToLLSC(
+ LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
+ [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
+ return true;
+ case TargetLoweringBase::AtomicExpansionKind::LLOnly:
+ return expandAtomicLoadToLL(LI);
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
+ return expandAtomicLoadToCmpXchg(LI);
+ }
+ llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
+}
+
+bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
+ IRBuilder<> Builder(LI);
+
+ // On some architectures, load-linked instructions are atomic for larger
+ // sizes than normal loads. For example, the only 64-bit load guaranteed
+ // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
+ Value *Val =
+ TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
+
+ LI->replaceAllUsesWith(Val);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
+ IRBuilder<> Builder(LI);
+ AtomicOrdering Order = LI->getOrdering();
+ Value *Addr = LI->getPointerOperand();
+ Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ Constant *DummyVal = Constant::getNullValue(Ty);
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, DummyVal, DummyVal, Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+
+ LI->replaceAllUsesWith(Loaded);
+ LI->eraseFromParent();
+
+ return true;
+}
+
+/// Convert an atomic store of a non-integral type to an integer store of the
+/// equivalent bitwidth. We used to not support floating point or vector
+/// atomics in the IR at all. The backends learned to deal with the bitcast
+/// idiom because that was the only way of expressing the notion of a atomic
+/// float or vector store. The long term plan is to teach each backend to
+/// instruction select from the original atomic store, but as a migration
+/// mechanism, we convert back to the old format which the backends understand.
+/// Each backend will need individual work to recognize the new format.
+StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
+ IRBuilder<> Builder(SI);
+ auto *M = SI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
+ M->getDataLayout());
+ Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
+
+ Value *Addr = SI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy,
+ Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
+ NewSI->setAlignment(SI->getAlignment());
+ NewSI->setVolatile(SI->isVolatile());
+ NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
+ DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
+ SI->eraseFromParent();
+ return NewSI;
+}
+
+bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
+ // This function is only called on atomic stores that are too large to be
+ // atomic if implemented as a native store. So we replace them by an
+ // atomic swap, that can be implemented for example as a ldrex/strex on ARM
+ // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
+ // It is the responsibility of the target to only signal expansion via
+ // shouldExpandAtomicRMW in cases where this is required and possible.
+ IRBuilder<> Builder(SI);
+ AtomicRMWInst *AI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
+ SI->getValueOperand(), SI->getOrdering());
+ SI->eraseFromParent();
+
+ // Now we have an appropriate swap instruction, lower it as usual.
+ return tryExpandAtomicRMW(AI);
+}
+
+static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
+ Value *Loaded, Value *NewVal,
+ AtomicOrdering MemOpOrder,
+ Value *&Success, Value *&NewLoaded) {
+ Value* Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+ Success = Builder.CreateExtractValue(Pair, 1, "success");
+ NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+}
+
+/// Emit IR to implement the given atomicrmw operation on values in registers,
+/// returning the new value.
+static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
+ Value *Loaded, Value *Inc) {
+ Value *NewVal;
+ switch (Op) {
+ case AtomicRMWInst::Xchg:
+ return Inc;
+ case AtomicRMWInst::Add:
+ return Builder.CreateAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::Sub:
+ return Builder.CreateSub(Loaded, Inc, "new");
+ case AtomicRMWInst::And:
+ return Builder.CreateAnd(Loaded, Inc, "new");
+ case AtomicRMWInst::Nand:
+ return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
+ case AtomicRMWInst::Or:
+ return Builder.CreateOr(Loaded, Inc, "new");
+ case AtomicRMWInst::Xor:
+ return Builder.CreateXor(Loaded, Inc, "new");
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+}
+
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+ switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC: {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(AI);
+ if (ValueSize < MinCASSize) {
+ llvm_unreachable(
+ "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
+ } else {
+ auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
+ return performAtomicOp(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
+ };
+ expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
+ AI->getOrdering(), PerformOp);
+ }
+ return true;
+ }
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
+ unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+ unsigned ValueSize = getAtomicOpSize(AI);
+ if (ValueSize < MinCASSize) {
+ expandPartwordAtomicRMW(AI,
+ TargetLoweringBase::AtomicExpansionKind::CmpXChg);
+ } else {
+ expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+ }
+ return true;
+ }
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
+ }
+}
+
+namespace {
+
+/// Result values from createMaskInstrs helper.
+struct PartwordMaskValues {
+ Type *WordType;
+ Type *ValueType;
+ Value *AlignedAddr;
+ Value *ShiftAmt;
+ Value *Mask;
+ Value *Inv_Mask;
+};
+} // end anonymous namespace
+
+/// This is a helper function which builds instructions to provide
+/// values necessary for partword atomic operations. It takes an
+/// incoming address, Addr, and ValueType, and constructs the address,
+/// shift-amounts and masks needed to work with a larger value of size
+/// WordSize.
+///
+/// AlignedAddr: Addr rounded down to a multiple of WordSize
+///
+/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
+/// from AlignAddr for it to have the same value as if
+/// ValueType was loaded from Addr.
+///
+/// Mask: Value to mask with the value loaded from AlignAddr to
+/// include only the part that would've been loaded from Addr.
+///
+/// Inv_Mask: The inverse of Mask.
+
+static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
+ Type *ValueType, Value *Addr,
+ unsigned WordSize) {
+ PartwordMaskValues Ret;
+
+ BasicBlock *BB = I->getParent();
+ Function *F = BB->getParent();
+ Module *M = I->getModule();
+
+ LLVMContext &Ctx = F->getContext();
+ const DataLayout &DL = M->getDataLayout();
+
+ unsigned ValueSize = DL.getTypeStoreSize(ValueType);
+
+ assert(ValueSize < WordSize);
+
+ Ret.ValueType = ValueType;
+ Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
+
+ Type *WordPtrType =
+ Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
+
+ Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
+ Ret.AlignedAddr = Builder.CreateIntToPtr(
+ Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
+ "AlignedAddr");
+
+ Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
+ if (DL.isLittleEndian()) {
+ // turn bytes into bits
+ Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
+ } else {
+ // turn bytes into bits, and count from the other side.
+ Ret.ShiftAmt =
+ Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
+ }
+
+ Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
+ Ret.Mask = Builder.CreateShl(
+ ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
+ "Mask");
+ Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
+
+ return Ret;
+}
+
+/// Emit IR to implement a masked version of a given atomicrmw
+/// operation. (That is, only the bits under the Mask should be
+/// affected by the operation)
+static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
+ IRBuilder<> &Builder, Value *Loaded,
+ Value *Shifted_Inc, Value *Inc,
+ const PartwordMaskValues &PMV) {
+ switch (Op) {
+ case AtomicRMWInst::Xchg: {
+ Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
+ Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
+ return FinalVal;
+ }
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ // Or/Xor won't affect any other bits, so can just be done
+ // directly.
+ return performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Nand: {
+ // The other arithmetic ops need to be masked into place.
+ Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+ Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
+ Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
+ Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
+ return FinalVal;
+ }
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin: {
+ // Finally, comparison ops will operate on the full value, so
+ // truncate down to the original size, and expand out again after
+ // doing the operation.
+ Value *Loaded_Shiftdown = Builder.CreateTrunc(
+ Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
+ Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
+ Value *NewVal_Shiftup = Builder.CreateShl(
+ Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
+ Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
+ Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
+ return FinalVal;
+ }
+ default:
+ llvm_unreachable("Unknown atomic op");
+ }
+}
+
+/// Expand a sub-word atomicrmw operation into an appropriate
+/// word-sized operation.
+///
+/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
+/// way as a typical atomicrmw expansion. The only difference here is
+/// that the operation inside of the loop must operate only upon a
+/// part of the value.
+void AtomicExpand::expandPartwordAtomicRMW(
+ AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
+
+ assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
+
+ AtomicOrdering MemOpOrder = AI->getOrdering();
+
+ IRBuilder<> Builder(AI);
+
+ PartwordMaskValues PMV =
+ createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
+ TLI->getMinCmpXchgSizeInBits() / 8);
+
+ Value *ValOperand_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
+ PMV.ShiftAmt, "ValOperand_Shifted");
+
+ auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
+ return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
+ ValOperand_Shifted, AI->getValOperand(), PMV);
+ };
+
+ // TODO: When we're ready to support LLSC conversions too, use
+ // insertRMWLLSCLoop here for ExpansionKind==LLSC.
+ Value *OldResult =
+ insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
+ PerformPartwordOp, createCmpXchgInstFun);
+ Value *FinalOldResult = Builder.CreateTrunc(
+ Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+ AI->replaceAllUsesWith(FinalOldResult);
+ AI->eraseFromParent();
+}
+
+void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
+ // The basic idea here is that we're expanding a cmpxchg of a
+ // smaller memory size up to a word-sized cmpxchg. To do this, we
+ // need to add a retry-loop for strong cmpxchg, so that
+ // modifications to other parts of the word don't cause a spurious
+ // failure.
+
+ // This generates code like the following:
+ // [[Setup mask values PMV.*]]
+ // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
+ // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
+ // %InitLoaded = load i32* %addr
+ // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
+ // br partword.cmpxchg.loop
+ // partword.cmpxchg.loop:
+ // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
+ // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
+ // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
+ // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
+ // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
+ // i32 %FullWord_NewVal success_ordering failure_ordering
+ // %OldVal = extractvalue { i32, i1 } %NewCI, 0
+ // %Success = extractvalue { i32, i1 } %NewCI, 1
+ // br i1 %Success, label %partword.cmpxchg.end,
+ // label %partword.cmpxchg.failure
+ // partword.cmpxchg.failure:
+ // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
+ // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
+ // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
+ // label %partword.cmpxchg.end
+ // partword.cmpxchg.end:
+ // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
+ // %FinalOldVal = trunc i32 %tmp1 to i8
+ // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
+ // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
+
+ Value *Addr = CI->getPointerOperand();
+ Value *Cmp = CI->getCompareOperand();
+ Value *NewVal = CI->getNewValOperand();
+
+ BasicBlock *BB = CI->getParent();
+ Function *F = BB->getParent();
+ IRBuilder<> Builder(CI);
+ LLVMContext &Ctx = Builder.getContext();
+
+ const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
+
+ BasicBlock *EndBB =
+ BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
+ auto FailureBB =
+ BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
+ auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
+
+ // The split call above "helpfully" added a branch at the end of BB
+ // (to the wrong place).
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+
+ PartwordMaskValues PMV = createMaskInstrs(
+ Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
+
+ // Shift the incoming values over, into the right location in the word.
+ Value *NewVal_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
+ Value *Cmp_Shifted =
+ Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
+
+ // Load the entire current word, and mask into place the expected and new
+ // values
+ LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
+ InitLoaded->setVolatile(CI->isVolatile());
+ Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
+ Builder.CreateBr(LoopBB);
+
+ // partword.cmpxchg.loop:
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
+ Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
+
+ // Mask/Or the expected and new values into place in the loaded word.
+ Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
+ Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
+ AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
+ PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
+ CI->getFailureOrdering(), CI->getSynchScope());
+ NewCI->setVolatile(CI->isVolatile());
+ // When we're building a strong cmpxchg, we need a loop, so you
+ // might think we could use a weak cmpxchg inside. But, using strong
+ // allows the below comparison for ShouldContinue, and we're
+ // expecting the underlying cmpxchg to be a machine instruction,
+ // which is strong anyways.
+ NewCI->setWeak(CI->isWeak());
+
+ Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
+ Value *Success = Builder.CreateExtractValue(NewCI, 1);
+
+ if (CI->isWeak())
+ Builder.CreateBr(EndBB);
+ else
+ Builder.CreateCondBr(Success, EndBB, FailureBB);
+
+ // partword.cmpxchg.failure:
+ Builder.SetInsertPoint(FailureBB);
+ // Upon failure, verify that the masked-out part of the loaded value
+ // has been modified. If it didn't, abort the cmpxchg, since the
+ // masked-in part must've.
+ Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
+ Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
+ Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
+
+ // Add the second value to the phi from above
+ Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
+
+ // partword.cmpxchg.end:
+ Builder.SetInsertPoint(CI);
+
+ Value *FinalOldVal = Builder.CreateTrunc(
+ Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
+ Value *Res = UndefValue::get(CI->getType());
+ Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+}
+
+void AtomicExpand::expandAtomicOpToLLSC(
+ Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
+ IRBuilder<> Builder(I);
+ Value *Loaded =
+ insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
+
+ I->replaceAllUsesWith(Loaded);
+ I->eraseFromParent();
+}
+
+Value *AtomicExpand::insertRMWLLSCLoop(
+ IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
+ LLVMContext &Ctx = Builder.getContext();
+ BasicBlock *BB = Builder.GetInsertBlock();
+ Function *F = BB->getParent();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // atomicrmw.start:
+ // %loaded = @load.linked(%addr)
+ // %new = some_op iN %loaded, %incr
+ // %stored = @store_conditional(%new, %addr)
+ // %try_again = icmp i32 ne %stored, 0
+ // br i1 %try_again, label %loop, label %atomicrmw.end
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB =
+ BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place).
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+
+ Value *NewVal = PerformOp(Builder, Loaded);
+
+ Value *StoreSuccess =
+ TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+ Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
+ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ return Loaded;
+}
+
+/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
+/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
+/// IR. As a migration step, we convert back to what use to be the standard
+/// way to represent a pointer cmpxchg so that we can update backends one by
+/// one.
+AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
+ auto *M = CI->getModule();
+ Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
+ M->getDataLayout());
+
+ IRBuilder<> Builder(CI);
+
+ Value *Addr = CI->getPointerOperand();
+ Type *PT = PointerType::get(NewTy,
+ Addr->getType()->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+ Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
+ Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
+
+
+ auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
+ CI->getSuccessOrdering(),
+ CI->getFailureOrdering(),
+ CI->getSynchScope());
+ NewCI->setVolatile(CI->isVolatile());
+ NewCI->setWeak(CI->isWeak());
+ DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
+
+ Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
+ Value *Succ = Builder.CreateExtractValue(NewCI, 1);
+
+ OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
+
+ Value *Res = UndefValue::get(CI->getType());
+ Res = Builder.CreateInsertValue(Res, OldVal, 0);
+ Res = Builder.CreateInsertValue(Res, Succ, 1);
+
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return NewCI;
+}
+
+
+bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+ AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
+ AtomicOrdering FailureOrder = CI->getFailureOrdering();
+ Value *Addr = CI->getPointerOperand();
+ BasicBlock *BB = CI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+ // If shouldInsertFencesForAtomic() returns true, then the target does not
+ // want to deal with memory orders, and emitLeading/TrailingFence should take
+ // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
+ // should preserve the ordering.
+ bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
+ AtomicOrdering MemOpOrder =
+ ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
+
+ // In implementations which use a barrier to achieve release semantics, we can
+ // delay emitting this barrier until we know a store is actually going to be
+ // attempted. The cost of this delay is that we need 2 copies of the block
+ // emitting the load-linked, affecting code size.
+ //
+ // Ideally, this logic would be unconditional except for the minsize check
+ // since in other cases the extra blocks naturally collapse down to the
+ // minimal loop. Unfortunately, this puts too much stress on later
+ // optimisations so we avoid emitting the extra logic in those cases too.
+ bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
+ SuccessOrder != AtomicOrdering::Monotonic &&
+ SuccessOrder != AtomicOrdering::Acquire &&
+ !F->optForMinSize();
+
+ // There's no overhead for sinking the release barrier in a weak cmpxchg, so
+ // do it even on minsize.
+ bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
+
+ // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
+ //
+ // The full expansion we produce is:
+ // [...]
+ // cmpxchg.start:
+ // %unreleasedload = @load.linked(%addr)
+ // %should_store = icmp eq %unreleasedload, %desired
+ // br i1 %should_store, label %cmpxchg.fencedstore,
+ // label %cmpxchg.nostore
+ // cmpxchg.releasingstore:
+ // fence?
+ // br label cmpxchg.trystore
+ // cmpxchg.trystore:
+ // %loaded.trystore = phi [%unreleasedload, %releasingstore],
+ // [%releasedload, %cmpxchg.releasedload]
+ // %stored = @store_conditional(%new, %addr)
+ // %success = icmp eq i32 %stored, 0
+ // br i1 %success, label %cmpxchg.success,
+ // label %cmpxchg.releasedload/%cmpxchg.failure
+ // cmpxchg.releasedload:
+ // %releasedload = @load.linked(%addr)
+ // %should_store = icmp eq %releasedload, %desired
+ // br i1 %should_store, label %cmpxchg.trystore,
+ // label %cmpxchg.failure
+ // cmpxchg.success:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.nostore:
+ // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
+ // [%releasedload,
+ // %cmpxchg.releasedload/%cmpxchg.trystore]
+ // @load_linked_fail_balance()?
+ // br label %cmpxchg.failure
+ // cmpxchg.failure:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.end:
+ // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
+ // [%loaded.trystore, %cmpxchg.trystore]
+ // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+ // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
+ // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
+ auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
+ auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
+ auto ReleasedLoadBB =
+ BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
+ auto TryStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
+ auto ReleasingStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
+ auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
+
+ // This grabs the DebugLoc from CI
+ IRBuilder<> Builder(CI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we might want a fence too. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
+ TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(StartBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(StartBB);
+ Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *ShouldStore = Builder.CreateICmpEQ(
+ UnreleasedLoad, CI->getCompareOperand(), "should_store");
+
+ // If the cmpxchg doesn't actually need any ordering when it fails, we can
+ // jump straight past that fence instruction (if it exists).
+ Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
+
+ Builder.SetInsertPoint(ReleasingStoreBB);
+ if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
+ TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(TryStoreBB);
+
+ Builder.SetInsertPoint(TryStoreBB);
+ Value *StoreSuccess = TLI->emitStoreConditional(
+ Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+ StoreSuccess = Builder.CreateICmpEQ(
+ StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
+ BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
+ Builder.CreateCondBr(StoreSuccess, SuccessBB,
+ CI->isWeak() ? FailureBB : RetryBB);
+
+ Builder.SetInsertPoint(ReleasedLoadBB);
+ Value *SecondLoad;
+ if (HasReleasedLoadBB) {
+ SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
+ "should_store");
+
+ // If the cmpxchg doesn't actually need any ordering when it fails, we can
+ // jump straight past that fence instruction (if it exists).
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ } else
+ Builder.CreateUnreachable();
+
+ // Make sure later instructions don't get reordered with a fence if
+ // necessary.
+ Builder.SetInsertPoint(SuccessBB);
+ if (ShouldInsertFencesForAtomic)
+ TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(ExitBB);
+
+ Builder.SetInsertPoint(NoStoreBB);
+ // In the failing case, where we don't execute the store-conditional, the
+ // target might want to balance out the load-linked with a dedicated
+ // instruction (e.g., on ARM, clearing the exclusive monitor).
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
+ Builder.CreateBr(FailureBB);
+
+ Builder.SetInsertPoint(FailureBB);
+ if (ShouldInsertFencesForAtomic)
+ TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(ExitBB);
+
+ // Finally, we have control-flow based knowledge of whether the cmpxchg
+ // succeeded or not. We expose this to later passes by converting any
+ // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
+ // PHI.
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
+ Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
+ Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
+
+ // Setup the builder so we can create any PHIs we need.
+ Value *Loaded;
+ if (!HasReleasedLoadBB)
+ Loaded = UnreleasedLoad;
+ else {
+ Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
+ PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+ TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
+ TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
+
+ Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
+ PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+ NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
+ NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
+
+ Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
+ PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+ ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
+ ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
+
+ Loaded = ExitLoaded;
+ }
+
+ // Look for any users of the cmpxchg that are just comparing the loaded value
+ // against the desired one, and replace them with the CFG-derived version.
+ SmallVector<ExtractValueInst *, 2> PrunedInsts;
+ for (auto User : CI->users()) {
+ ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
+ if (!EV)
+ continue;
+
+ assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
+ "weird extraction from { iN, i1 }");
+
+ if (EV->getIndices()[0] == 0)
+ EV->replaceAllUsesWith(Loaded);
+ else
+ EV->replaceAllUsesWith(Success);
+
+ PrunedInsts.push_back(EV);
+ }
+
+ // We can remove the instructions now we're no longer iterating through them.
+ for (auto EV : PrunedInsts)
+ EV->eraseFromParent();
+
+ if (!CI->use_empty()) {
+ // Some use of the full struct return that we don't understand has happened,
+ // so we've got to reconstruct it properly.
+ Value *Res;
+ Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ }
+
+ CI->eraseFromParent();
+ return true;
+}
+
+bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
+ auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
+ if(!C)
+ return false;
+
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ switch(Op) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ default:
+ return false;
+ }
+}
+
+bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
+ if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
+ tryExpandAtomicLoad(ResultingLoad);
+ return true;
+ }
+ return false;
+}
+
+Value *AtomicExpand::insertRMWCmpXchgLoop(
+ IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
+ CreateCmpXchgInstFun CreateCmpXchg) {
+ LLVMContext &Ctx = Builder.getContext();
+ BasicBlock *BB = Builder.GetInsertBlock();
+ Function *F = BB->getParent();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB =
+ BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
+ // Atomics require at least natural alignment.
+ InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal = PerformOp(Builder, Loaded);
+
+ Value *NewLoaded = nullptr;
+ Value *Success = nullptr;
+
+ CreateCmpXchg(Builder, Addr, Loaded, NewVal,
+ MemOpOrder == AtomicOrdering::Unordered
+ ? AtomicOrdering::Monotonic
+ : MemOpOrder,
+ Success, NewLoaded);
+ assert(Success && NewLoaded);
+
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ return NewLoaded;
+}
+
+// Note: This function is exposed externally by AtomicExpandUtils.h
+bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg) {
+ IRBuilder<> Builder(AI);
+ Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
+ Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
+ [&](IRBuilder<> &Builder, Value *Loaded) {
+ return performAtomicOp(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
+ },
+ CreateCmpXchg);
+
+ AI->replaceAllUsesWith(Loaded);
+ AI->eraseFromParent();
+ return true;
+}
+
+// In order to use one of the sized library calls such as
+// __atomic_fetch_add_4, the alignment must be sufficient, the size
+// must be one of the potentially-specialized sizes, and the value
+// type must actually exist in C on the target (otherwise, the
+// function wouldn't actually be defined.)
+static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
+ const DataLayout &DL) {
+ // TODO: "LargestSize" is an approximation for "largest type that
+ // you can express in C". It seems to be the case that int128 is
+ // supported on all 64-bit platforms, otherwise only up to 64-bit
+ // integers are supported. If we get this wrong, then we'll try to
+ // call a sized libcall that doesn't actually exist. There should
+ // really be some more reliable way in LLVM of determining integer
+ // sizes which are valid in the target's C ABI...
+ unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
+ return Align >= Size &&
+ (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
+ Size <= LargestSize;
+}
+
+void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
+ RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
+ I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ (void)expanded;
+ assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
+}
+
+void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
+ RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
+ I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+ (void)expanded;
+ assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
+}
+
+void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
+ static const RTLIB::Libcall Libcalls[6] = {
+ RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
+ RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
+ RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+
+ bool expanded = expandAtomicOpToLibcall(
+ I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
+ I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
+ Libcalls);
+ (void)expanded;
+ assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
+}
+
+static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
+ static const RTLIB::Libcall LibcallsXchg[6] = {
+ RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
+ RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
+ RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
+ static const RTLIB::Libcall LibcallsAdd[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
+ RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
+ RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
+ static const RTLIB::Libcall LibcallsSub[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
+ RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
+ RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
+ static const RTLIB::Libcall LibcallsAnd[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
+ RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
+ RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
+ static const RTLIB::Libcall LibcallsOr[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
+ RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
+ RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
+ static const RTLIB::Libcall LibcallsXor[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
+ RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
+ RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
+ static const RTLIB::Libcall LibcallsNand[6] = {
+ RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
+ RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
+ RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
+
+ switch (Op) {
+ case AtomicRMWInst::BAD_BINOP:
+ llvm_unreachable("Should not have BAD_BINOP.");
+ case AtomicRMWInst::Xchg:
+ return makeArrayRef(LibcallsXchg);
+ case AtomicRMWInst::Add:
+ return makeArrayRef(LibcallsAdd);
+ case AtomicRMWInst::Sub:
+ return makeArrayRef(LibcallsSub);
+ case AtomicRMWInst::And:
+ return makeArrayRef(LibcallsAnd);
+ case AtomicRMWInst::Or:
+ return makeArrayRef(LibcallsOr);
+ case AtomicRMWInst::Xor:
+ return makeArrayRef(LibcallsXor);
+ case AtomicRMWInst::Nand:
+ return makeArrayRef(LibcallsNand);
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ // No atomic libcalls are available for max/min/umax/umin.
+ return {};
+ }
+ llvm_unreachable("Unexpected AtomicRMW operation.");
+}
+
+void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
+ ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
+
+ unsigned Size = getAtomicOpSize(I);
+ unsigned Align = getAtomicOpAlign(I);
+
+ bool Success = false;
+ if (!Libcalls.empty())
+ Success = expandAtomicOpToLibcall(
+ I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
+ I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
+
+ // The expansion failed: either there were no libcalls at all for
+ // the operation (min/max), or there were only size-specialized
+ // libcalls (add/sub/etc) and we needed a generic. So, expand to a
+ // CAS libcall, via a CAS loop, instead.
+ if (!Success) {
+ expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
+ Value *Loaded, Value *NewVal,
+ AtomicOrdering MemOpOrder,
+ Value *&Success, Value *&NewLoaded) {
+ // Create the CAS instruction normally...
+ AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+ Success = Builder.CreateExtractValue(Pair, 1, "success");
+ NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+
+ // ...and then expand the CAS into a libcall.
+ expandAtomicCASToLibcall(Pair);
+ });
+ }
+}
+
+// A helper routine for the above expandAtomic*ToLibcall functions.
+//
+// 'Libcalls' contains an array of enum values for the particular
+// ATOMIC libcalls to be emitted. All of the other arguments besides
+// 'I' are extracted from the Instruction subclass by the
+// caller. Depending on the particular call, some will be null.
+bool AtomicExpand::expandAtomicOpToLibcall(
+ Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
+ Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
+ AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
+ assert(Libcalls.size() == 6);
+
+ LLVMContext &Ctx = I->getContext();
+ Module *M = I->getModule();
+ const DataLayout &DL = M->getDataLayout();
+ IRBuilder<> Builder(I);
+ IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
+
+ bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
+ Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
+
+ unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
+
+ // TODO: the "order" argument type is "int", not int32. So
+ // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
+ ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
+ assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
+ Constant *OrderingVal =
+ ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
+ Constant *Ordering2Val = nullptr;
+ if (CASExpected) {
+ assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
+ Ordering2Val =
+ ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
+ }
+ bool HasResult = I->getType() != Type::getVoidTy(Ctx);
+
+ RTLIB::Libcall RTLibType;
+ if (UseSizedLibcall) {
+ switch (Size) {
+ case 1: RTLibType = Libcalls[1]; break;
+ case 2: RTLibType = Libcalls[2]; break;
+ case 4: RTLibType = Libcalls[3]; break;
+ case 8: RTLibType = Libcalls[4]; break;
+ case 16: RTLibType = Libcalls[5]; break;
+ }
+ } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
+ RTLibType = Libcalls[0];
+ } else {
+ // Can't use sized function, and there's no generic for this
+ // operation, so give up.
+ return false;
+ }
+
+ // Build up the function call. There's two kinds. First, the sized
+ // variants. These calls are going to be one of the following (with
+ // N=1,2,4,8,16):
+ // iN __atomic_load_N(iN *ptr, int ordering)
+ // void __atomic_store_N(iN *ptr, iN val, int ordering)
+ // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
+ // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
+ // int success_order, int failure_order)
+ //
+ // Note that these functions can be used for non-integer atomic
+ // operations, the values just need to be bitcast to integers on the
+ // way in and out.
+ //
+ // And, then, the generic variants. They look like the following:
+ // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
+ // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
+ // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
+ // int ordering)
+ // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
+ // void *desired, int success_order,
+ // int failure_order)
+ //
+ // The different signatures are built up depending on the
+ // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
+ // variables.
+
+ AllocaInst *AllocaCASExpected = nullptr;
+ Value *AllocaCASExpected_i8 = nullptr;
+ AllocaInst *AllocaValue = nullptr;
+ Value *AllocaValue_i8 = nullptr;
+ AllocaInst *AllocaResult = nullptr;
+ Value *AllocaResult_i8 = nullptr;
+
+ Type *ResultTy;
+ SmallVector<Value *, 6> Args;
+ AttributeSet Attr;
+
+ // 'size' argument.
+ if (!UseSizedLibcall) {
+ // Note, getIntPtrType is assumed equivalent to size_t.
+ Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
+ }
+
+ // 'ptr' argument.
+ Value *PtrVal =
+ Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
+ Args.push_back(PtrVal);
+
+ // 'expected' argument, if present.
+ if (CASExpected) {
+ AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
+ AllocaCASExpected->setAlignment(AllocaAlignment);
+ AllocaCASExpected_i8 =
+ Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
+ Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
+ Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
+ Args.push_back(AllocaCASExpected_i8);
+ }
+
+ // 'val' argument ('desired' for cas), if present.
+ if (ValueOperand) {
+ if (UseSizedLibcall) {
+ Value *IntValue =
+ Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
+ Args.push_back(IntValue);
+ } else {
+ AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
+ AllocaValue->setAlignment(AllocaAlignment);
+ AllocaValue_i8 =
+ Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
+ Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
+ Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
+ Args.push_back(AllocaValue_i8);
+ }
+ }
+
+ // 'ret' argument.
+ if (!CASExpected && HasResult && !UseSizedLibcall) {
+ AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
+ AllocaResult->setAlignment(AllocaAlignment);
+ AllocaResult_i8 =
+ Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
+ Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
+ Args.push_back(AllocaResult_i8);
+ }
+
+ // 'ordering' ('success_order' for cas) argument.
+ Args.push_back(OrderingVal);
+
+ // 'failure_order' argument, if present.
+ if (Ordering2Val)
+ Args.push_back(Ordering2Val);
+
+ // Now, the return type.
+ if (CASExpected) {
+ ResultTy = Type::getInt1Ty(Ctx);
+ Attr = Attr.addAttribute(Ctx, AttributeSet::ReturnIndex, Attribute::ZExt);
+ } else if (HasResult && UseSizedLibcall)
+ ResultTy = SizedIntTy;
+ else
+ ResultTy = Type::getVoidTy(Ctx);
+
+ // Done with setting up arguments and return types, create the call:
+ SmallVector<Type *, 6> ArgTys;
+ for (Value *Arg : Args)
+ ArgTys.push_back(Arg->getType());
+ FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
+ Constant *LibcallFn =
+ M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
+ CallInst *Call = Builder.CreateCall(LibcallFn, Args);
+ Call->setAttributes(Attr);
+ Value *Result = Call;
+
+ // And then, extract the results...
+ if (ValueOperand && !UseSizedLibcall)
+ Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
+
+ if (CASExpected) {
+ // The final result from the CAS is {load of 'expected' alloca, bool result
+ // from call}
+ Type *FinalResultTy = I->getType();
+ Value *V = UndefValue::get(FinalResultTy);
+ Value *ExpectedOut =
+ Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
+ Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
+ V = Builder.CreateInsertValue(V, ExpectedOut, 0);
+ V = Builder.CreateInsertValue(V, Result, 1);
+ I->replaceAllUsesWith(V);
+ } else if (HasResult) {
+ Value *V;
+ if (UseSizedLibcall)
+ V = Builder.CreateBitOrPointerCast(Result, I->getType());
+ else {
+ V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
+ Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
+ }
+ I->replaceAllUsesWith(V);
+ }
+ I->eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
new file mode 100644
index 000000000000..a67e194356d8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -0,0 +1,38 @@
+//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the implementation of a basic TargetTransformInfo pass
+/// predicated on the target abstractions present in the target independent
+/// code generator. It uses these (primarily TargetLowering) to model as much
+/// of the TTI query interface as possible. It is included by most targets so
+/// that they can specialize only a small subset of the query space.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include <utility>
+using namespace llvm;
+
+#define DEBUG_TYPE "basictti"
+
+// This flag is used by the template base class for BasicTTIImpl, and here to
+// provide a definition.
+cl::opt<unsigned>
+ llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
+ cl::desc("Threshold for partial unrolling"),
+ cl::Hidden);
+
+BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 000000000000..fa705761645f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1919 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block. This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form. It also must handle virtual registers for targets that emit virtual
+// ISA (e.g. NVPTX).
+//
+//===----------------------------------------------------------------------===//
+
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "branchfolding"
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+STATISTIC(NumHoist , "Number of times common instructions are hoisted");
+
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+ cl::desc("Max number of predecessors to consider tail merging"),
+ cl::init(150), cl::Hidden);
+
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+ cl::desc("Min number of instructions to consider tail merging"),
+ cl::init(3), cl::Hidden);
+
+namespace {
+ /// BranchFolderPass - Wrap branch folder in a machine function pass.
+ class BranchFolderPass : public MachineFunctionPass {
+ public:
+ static char ID;
+ explicit BranchFolderPass(): MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char BranchFolderPass::ID = 0;
+char &llvm::BranchFolderPassID = BranchFolderPass::ID;
+
+INITIALIZE_PASS(BranchFolderPass, "branch-folder",
+ "Control Flow Optimizer", false, false)
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // TailMerge can create jump into if branches that make CFG irreducible for
+ // HW that requires structurized CFG.
+ bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
+ PassConfig->getEnableTailMerge();
+ BranchFolder::MBFIWrapper MBBFreqInfo(
+ getAnalysis<MachineBlockFrequencyInfo>());
+ BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
+ getAnalysis<MachineBranchProbabilityInfo>());
+ return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
+ MF.getSubtarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+}
+
+BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+ MBFIWrapper &FreqInfo,
+ const MachineBranchProbabilityInfo &ProbInfo)
+ : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo),
+ MBPI(ProbInfo) {
+ switch (FlagEnableTailMerge) {
+ case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_TRUE: EnableTailMerge = true; break;
+ case cl::BOU_FALSE: EnableTailMerge = false; break;
+ }
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ MachineFunction *MF = MBB->getParent();
+ // drop all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end()-1);
+
+ // Avoid matching if this pointer gets reused.
+ TriedMerging.erase(MBB);
+
+ // Remove the block.
+ MF->erase(MBB);
+ FuncletMembership.erase(MBB);
+ if (MLI)
+ MLI->removeBlock(MBB);
+}
+
+/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
+/// followed by terminators, and if the implicitly defined registers are not
+/// used by the terminators, remove those implicit_def's. e.g.
+/// BB1:
+/// r0 = implicit_def
+/// r1 = implicit_def
+/// br
+/// This block can be optimized away later if the implicit instructions are
+/// removed.
+bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
+ SmallSet<unsigned, 4> ImpDefRegs;
+ MachineBasicBlock::iterator I = MBB->begin();
+ while (I != MBB->end()) {
+ if (!I->isImplicitDef())
+ break;
+ unsigned Reg = I->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ ImpDefRegs.insert(*SubRegs);
+ } else {
+ ImpDefRegs.insert(Reg);
+ }
+ ++I;
+ }
+ if (ImpDefRegs.empty())
+ return false;
+
+ MachineBasicBlock::iterator FirstTerm = I;
+ while (I != MBB->end()) {
+ if (!TII->isUnpredicatedTerminator(*I))
+ return false;
+ // See if it uses any of the implicitly defined registers.
+ for (const MachineOperand &MO : I->operands()) {
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (ImpDefRegs.count(Reg))
+ return false;
+ }
+ ++I;
+ }
+
+ I = MBB->begin();
+ while (I != FirstTerm) {
+ MachineInstr *ImpDefMI = &*I;
+ ++I;
+ MBB->erase(ImpDefMI);
+ }
+
+ return true;
+}
+
+/// OptimizeFunction - Perhaps branch folding, tail merging and other
+/// CFG optimizations on the given function. Block placement changes the layout
+/// and may create new tail merging opportunities.
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ MachineModuleInfo *mmi,
+ MachineLoopInfo *mli, bool AfterPlacement) {
+ if (!tii) return false;
+
+ TriedMerging.clear();
+
+ AfterBlockPlacement = AfterPlacement;
+ TII = tii;
+ TRI = tri;
+ MMI = mmi;
+ MLI = mli;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF);
+ if (!UpdateLiveIns)
+ MRI.invalidateLiveness();
+
+ // Fix CFG. The later algorithms expect it to be right.
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true))
+ MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+ MadeChange |= OptimizeImpDefsBlock(&MBB);
+ }
+
+ // Recalculate funclet membership.
+ FuncletMembership = getFuncletMembership(MF);
+
+ bool MadeChangeThisIteration = true;
+ while (MadeChangeThisIteration) {
+ MadeChangeThisIteration = TailMergeBlocks(MF);
+ // No need to clean up if tail merging does not change anything after the
+ // block placement.
+ if (!AfterBlockPlacement || MadeChangeThisIteration)
+ MadeChangeThisIteration |= OptimizeBranches(MF);
+ if (EnableHoistCommonCode)
+ MadeChangeThisIteration |= HoistCommonCode(MF);
+ MadeChange |= MadeChangeThisIteration;
+ }
+
+ // See if any jump tables have become dead as the code generator
+ // did its thing.
+ MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+ if (!JTI)
+ return MadeChange;
+
+ // Walk the function to find jump tables that are live.
+ BitVector JTIsLive(JTI->getJumpTables().size());
+ for (const MachineBasicBlock &BB : MF) {
+ for (const MachineInstr &I : BB)
+ for (const MachineOperand &Op : I.operands()) {
+ if (!Op.isJTI()) continue;
+
+ // Remember that this JT is live.
+ JTIsLive.set(Op.getIndex());
+ }
+ }
+
+ // Finally, remove dead jump tables. This happens when the
+ // indirect jump was unreachable (and thus deleted).
+ for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+ if (!JTIsLive.test(i)) {
+ JTI->RemoveJumpTable(i);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr &MI) {
+ unsigned Hash = MI.getOpcode();
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &Op = MI.getOperand(i);
+
+ // Merge in bits from the operand if easy. We can't use MachineOperand's
+ // hash_code here because it's not deterministic and we sort by hash value
+ // later.
+ unsigned OperandHash = 0;
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register:
+ OperandHash = Op.getReg();
+ break;
+ case MachineOperand::MO_Immediate:
+ OperandHash = Op.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OperandHash = Op.getMBB()->getNumber();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ OperandHash = Op.getIndex();
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ // Global address / external symbol are too hard, don't bother, but do
+ // pull in the offset.
+ OperandHash = Op.getOffset();
+ break;
+ default:
+ break;
+ }
+
+ Hash += ((OperandHash << 3) | Op.getType()) << (i & 31);
+ }
+ return Hash;
+}
+
+/// HashEndOfMBB - Hash the last instruction in the MBB.
+static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
+ MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return 0;
+
+ return HashMachineInstr(*I);
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end. Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2) {
+ I1 = MBB1->end();
+ I2 = MBB2->end();
+
+ unsigned TailLen = 0;
+ while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+ --I1; --I2;
+ // Skip debugging pseudos; necessary to avoid changing the code.
+ while (I1->isDebugValue()) {
+ if (I1==MBB1->begin()) {
+ while (I2->isDebugValue()) {
+ if (I2==MBB2->begin())
+ // I1==DBG at begin; I2==DBG at begin
+ return TailLen;
+ --I2;
+ }
+ ++I2;
+ // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
+ return TailLen;
+ }
+ --I1;
+ }
+ // I1==first (untested) non-DBG preceding known match
+ while (I2->isDebugValue()) {
+ if (I2==MBB2->begin()) {
+ ++I1;
+ // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
+ return TailLen;
+ }
+ --I2;
+ }
+ // I1, I2==first (untested) non-DBGs preceding known match
+ if (!I1->isIdenticalTo(*I2) ||
+ // FIXME: This check is dubious. It's used to get around a problem where
+ // people incorrectly expect inline asm directives to remain in the same
+ // relative order. This is untenable because normal compiler
+ // optimizations (like this one) may reorder and/or merge these
+ // directives.
+ I1->isInlineAsm()) {
+ ++I1; ++I2;
+ break;
+ }
+ ++TailLen;
+ }
+ // Back past possible debugging pseudos at beginning of block. This matters
+ // when one block differs from the other only by whether debugging pseudos
+ // are present at the beginning. (This way, the various checks later for
+ // I1==MBB1->begin() work as expected.)
+ if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
+ --I2;
+ while (I2->isDebugValue()) {
+ if (I2 == MBB2->begin())
+ return TailLen;
+ --I2;
+ }
+ ++I2;
+ }
+ if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
+ --I1;
+ while (I1->isDebugValue()) {
+ if (I1 == MBB1->begin())
+ return TailLen;
+ --I1;
+ }
+ ++I1;
+ }
+ return TailLen;
+}
+
+void BranchFolder::computeLiveIns(MachineBasicBlock &MBB) {
+ if (!UpdateLiveIns)
+ return;
+
+ LiveRegs.init(TRI);
+ LiveRegs.addLiveOutsNoPristines(MBB);
+ for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+ LiveRegs.stepBackward(MI);
+
+ for (unsigned Reg : LiveRegs) {
+ // Skip the register if we are about to add one of its super registers.
+ bool ContainsSuperReg = false;
+ for (MCSuperRegIterator SReg(Reg, TRI); SReg.isValid(); ++SReg) {
+ if (LiveRegs.contains(*SReg)) {
+ ContainsSuperReg = true;
+ break;
+ }
+ }
+ if (ContainsSuperReg)
+ continue;
+ MBB.addLiveIn(Reg);
+ }
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest) {
+ TII->ReplaceTailWithBranchTo(OldInst, NewDest);
+
+ computeLiveIns(*NewDest);
+
+ ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator. This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1,
+ const BasicBlock *BB) {
+ if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
+ return nullptr;
+
+ MachineFunction &MF = *CurMBB.getParent();
+
+ // Create the fall-through block.
+ MachineFunction::iterator MBBI = CurMBB.getIterator();
+ MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB);
+ CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+ // Move all the successors of this block to the specified block.
+ NewMBB->transferSuccessors(&CurMBB);
+
+ // Add an edge from CurMBB to NewMBB for the fall-through.
+ CurMBB.addSuccessor(NewMBB);
+
+ // Splice the code over.
+ NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+ // NewMBB belongs to the same loop as CurMBB.
+ if (MLI)
+ if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
+ ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
+
+ // NewMBB inherits CurMBB's block frequency.
+ MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
+
+ computeLiveIns(*NewMBB);
+
+ // Add the new block to the funclet.
+ const auto &FuncletI = FuncletMembership.find(&CurMBB);
+ if (FuncletI != FuncletMembership.end()) {
+ auto n = FuncletI->second;
+ FuncletMembership[NewMBB] = n;
+ }
+
+ return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ unsigned Time = 0;
+ for (; I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+ if (I->isCall())
+ Time += 10;
+ else if (I->mayLoad() || I->mayStore())
+ Time += 2;
+ else
+ ++Time;
+ }
+ return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging). In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = CurMBB->getParent();
+ MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
+ if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+ MachineBasicBlock *NextBB = &*I;
+ if (TBB == NextBB && !Cond.empty() && !FBB) {
+ if (!TII->ReverseBranchCondition(Cond)) {
+ TII->RemoveBranch(*CurMBB);
+ TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
+ return;
+ }
+ }
+ }
+ TII->InsertBranch(*CurMBB, SuccBB, nullptr,
+ SmallVector<MachineOperand, 0>(), dl);
+}
+
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+ if (getHash() < o.getHash())
+ return true;
+ if (getHash() > o.getHash())
+ return false;
+ if (getBlock()->getNumber() < o.getBlock()->getNumber())
+ return true;
+ if (getBlock()->getNumber() > o.getBlock()->getNumber())
+ return false;
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+ llvm_unreachable("Predecessor appears twice");
+#else
+ return false;
+#endif
+}
+
+BlockFrequency
+BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const {
+ auto I = MergedBBFreq.find(MBB);
+
+ if (I != MergedBBFreq.end())
+ return I->second;
+
+ return MBFI.getBlockFreq(MBB);
+}
+
+void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
+ BlockFrequency F) {
+ MergedBBFreq[MBB] = F;
+}
+
+raw_ostream &
+BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
+ const MachineBasicBlock *MBB) const {
+ return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
+}
+
+raw_ostream &
+BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
+ const BlockFrequency Freq) const {
+ return MBFI.printBlockFreq(OS, Freq);
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &I) {
+ I = MBB->end();
+ unsigned NumTerms = 0;
+ for (;;) {
+ if (I == MBB->begin()) {
+ I = MBB->end();
+ break;
+ }
+ --I;
+ if (!I->isTerminator()) break;
+ ++NumTerms;
+ }
+ return NumTerms;
+}
+
+/// ProfitableToMerge - Check if two machine basic blocks have a common tail
+/// and decide if it would be profitable to merge those tails. Return the
+/// length of the common tail and iterators to the first common instruction
+/// in each block.
+static bool
+ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
+ unsigned minCommonTailLength, unsigned &CommonTailLen,
+ MachineBasicBlock::iterator &I1,
+ MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<const MachineBasicBlock *, int> &FuncletMembership) {
+ // It is never profitable to tail-merge blocks from two different funclets.
+ if (!FuncletMembership.empty()) {
+ auto Funclet1 = FuncletMembership.find(MBB1);
+ assert(Funclet1 != FuncletMembership.end());
+ auto Funclet2 = FuncletMembership.find(MBB2);
+ assert(Funclet2 != FuncletMembership.end());
+ if (Funclet1->second != Funclet2->second)
+ return false;
+ }
+
+ CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
+ if (CommonTailLen == 0)
+ return false;
+ DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
+ << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+ << '\n');
+
+ // It's almost always profitable to merge any number of non-terminator
+ // instructions with the block that falls through into the common successor.
+ if (MBB1 == PredBB || MBB2 == PredBB) {
+ MachineBasicBlock::iterator I;
+ unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+ if (CommonTailLen > NumTerms)
+ return true;
+ }
+
+ // If one of the blocks can be completely merged and happens to be in
+ // a position where the other could fall through into it, merge any number
+ // of instructions, because it can be done without a branch.
+ // TODO: If the blocks are not adjacent, move one of them so that they are?
+ if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ return true;
+ if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ return true;
+
+ // If both blocks have an unconditional branch temporarily stripped out,
+ // count that as an additional common instruction for the following
+ // heuristics.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ !MBB1->back().isBarrier() &&
+ !MBB2->back().isBarrier())
+ ++EffectiveTailLen;
+
+ // Check if the common tail is long enough to be worthwhile.
+ if (EffectiveTailLen >= minCommonTailLength)
+ return true;
+
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ MachineFunction *MF = MBB1->getParent();
+ return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() &&
+ (I1 == MBB1->begin() || I2 == MBB2->begin());
+}
+
+/// ComputeSameTails - Look through all the blocks in MergePotentials that have
+/// hash CurHash (guaranteed to match the last element). Build the vector
+/// SameTails of all those that have the (same) largest number of instructions
+/// in common of any pair of these blocks. SameTails entries contain an
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// instruction where the matching code sequence begins.
+/// Order of elements in SameTails is the reverse of the order in which
+/// those blocks appear in MergePotentials (where they are not necessarily
+/// consecutive).
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ unsigned maxCommonTailLength = 0U;
+ SameTails.clear();
+ MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+ MPIterator HighestMPIter = std::prev(MergePotentials.end());
+ for (MPIterator CurMPIter = std::prev(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) {
+ for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) {
+ unsigned CommonTailLen;
+ if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+ minCommonTailLength,
+ CommonTailLen, TrialBBI1, TrialBBI2,
+ SuccBB, PredBB,
+ FuncletMembership)) {
+ if (CommonTailLen > maxCommonTailLength) {
+ SameTails.clear();
+ maxCommonTailLength = CommonTailLen;
+ HighestMPIter = CurMPIter;
+ SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
+ }
+ if (HighestMPIter == CurMPIter &&
+ CommonTailLen == maxCommonTailLength)
+ SameTails.push_back(SameTailElt(I, TrialBBI2));
+ }
+ if (I == B)
+ break;
+ }
+ }
+ return maxCommonTailLength;
+}
+
+/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
+/// MergePotentials, restoring branches at ends of blocks as appropriate.
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ MPIterator CurMPIter, B;
+ for (CurMPIter = std::prev(MergePotentials.end()),
+ B = MergePotentials.begin();
+ CurMPIter->getHash() == CurHash; --CurMPIter) {
+ // Put the unconditional branch back, if we need one.
+ MachineBasicBlock *CurMBB = CurMPIter->getBlock();
+ if (SuccBB && CurMBB != PredBB)
+ FixTail(CurMBB, SuccBB, TII);
+ if (CurMPIter == B)
+ break;
+ }
+ if (CurMPIter->getHash() != CurHash)
+ CurMPIter++;
+ MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
+/// only of the common tail. Create a block that does by splitting one.
+bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ MachineBasicBlock *SuccBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex) {
+ commonTailIndex = 0;
+ unsigned TimeEstimate = ~0U;
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ // Use PredBB if possible; that doesn't require a new branch.
+ if (SameTails[i].getBlock() == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ // Otherwise, make a (fairly bogus) choice based on estimate of
+ // how long it will take the various blocks to execute.
+ unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+ SameTails[i].getTailStartPos());
+ if (t <= TimeEstimate) {
+ TimeEstimate = t;
+ commonTailIndex = i;
+ }
+ }
+
+ MachineBasicBlock::iterator BBI =
+ SameTails[commonTailIndex].getTailStartPos();
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ // If the common tail includes any debug info we will take it pretty
+ // randomly from one of the inputs. Might be better to remove it?
+ DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
+ << maxCommonTailLength);
+
+ // If the split block unconditionally falls-thru to SuccBB, it will be
+ // merged. In control flow terms it should then take SuccBB's name. e.g. If
+ // SuccBB is an inner loop, the common tail is still part of the inner loop.
+ const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ?
+ SuccBB->getBasicBlock() : MBB->getBasicBlock();
+ MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB);
+ if (!newMBB) {
+ DEBUG(dbgs() << "... failed!");
+ return false;
+ }
+
+ SameTails[commonTailIndex].setBlock(newMBB);
+ SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
+ // If we split PredBB, newMBB is the new predecessor.
+ if (PredBB == MBB)
+ PredBB = newMBB;
+
+ return true;
+}
+
+static void
+mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
+ MachineBasicBlock &MBBCommon) {
+ // Merge MMOs from memory operations in the common block.
+ MachineBasicBlock *MBB = MBBIStartPos->getParent();
+ // Note CommonTailLen does not necessarily matches the size of
+ // the common BB nor all its instructions because of debug
+ // instructions differences.
+ unsigned CommonTailLen = 0;
+ for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)
+ ++CommonTailLen;
+
+ MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin();
+ MachineBasicBlock::reverse_iterator MBBIE = MBB->rend();
+ MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();
+ MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();
+
+ while (CommonTailLen--) {
+ assert(MBBI != MBBIE && "Reached BB end within common tail length!");
+ (void)MBBIE;
+
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+
+ while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue())
+ ++MBBICommon;
+
+ assert(MBBICommon != MBBIECommon &&
+ "Reached BB end within common tail length!");
+ assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");
+
+ if (MBBICommon->mayLoad() || MBBICommon->mayStore())
+ MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
+
+ ++MBBI;
+ ++MBBICommon;
+ }
+}
+
+// See if any of the blocks in MergePotentials (which all have SuccBB as a
+// successor, or all have no successor if it is null) can be tail-merged.
+// If there is a successor, any blocks in MergePotentials that are not
+// tail-merged and are not immediately before Succ must have an unconditional
+// branch to Succ added (but the predecessor/successor lists need no
+// adjustment). The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
+ bool MadeChange = false;
+
+ // Except for the special cases below, tail-merge if there are at least
+ // this many instructions in common.
+ unsigned minCommonTailLength = TailMergeSize;
+
+ DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", ");
+ dbgs() << "\n";
+ if (SuccBB) {
+ dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n';
+ if (PredBB)
+ dbgs() << " which has fall-through from BB#"
+ << PredBB->getNumber() << "\n";
+ }
+ dbgs() << "Looking for common tails of at least "
+ << minCommonTailLength << " instruction"
+ << (minCommonTailLength == 1 ? "" : "s") << '\n';
+ );
+
+ // Sort by hash value so that blocks with identical end sequences sort
+ // together.
+ array_pod_sort(MergePotentials.begin(), MergePotentials.end());
+
+ // Walk through equivalence sets looking for actual exact matches.
+ while (MergePotentials.size() > 1) {
+ unsigned CurHash = MergePotentials.back().getHash();
+
+ // Build SameTails, identifying the set of blocks with this hash code
+ // and with the maximum number of instructions in common.
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ minCommonTailLength,
+ SuccBB, PredBB);
+
+ // If we didn't find any pair that has at least minCommonTailLength
+ // instructions in common, remove all blocks with this hash code and retry.
+ if (SameTails.empty()) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+
+ // If one of the blocks is the entire common tail (and not the entry
+ // block, which we can't jump to), we can treat all blocks with this same
+ // tail at once. Use PredBB if that is one of the possibilities, as that
+ // will not introduce any extra branches.
+ MachineBasicBlock *EntryBB =
+ &MergePotentials.front().getBlock()->getParent()->front();
+ unsigned commonTailIndex = SameTails.size();
+ // If there are two blocks, check to see if one can be made to fall through
+ // into the other.
+ if (SameTails.size() == 2 &&
+ SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+ SameTails[1].tailIsWholeBlock())
+ commonTailIndex = 1;
+ else if (SameTails.size() == 2 &&
+ SameTails[1].getBlock()->isLayoutSuccessor(
+ SameTails[0].getBlock()) &&
+ SameTails[0].tailIsWholeBlock())
+ commonTailIndex = 0;
+ else {
+ // Otherwise just pick one, favoring the fall-through predecessor if
+ // there is one.
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = SameTails[i].getBlock();
+ if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+ continue;
+ if (MBB == PredBB) {
+ commonTailIndex = i;
+ break;
+ }
+ if (SameTails[i].tailIsWholeBlock())
+ commonTailIndex = i;
+ }
+ }
+
+ if (commonTailIndex == SameTails.size() ||
+ (SameTails[commonTailIndex].getBlock() == PredBB &&
+ !SameTails[commonTailIndex].tailIsWholeBlock())) {
+ // None of the blocks consist entirely of the common tail.
+ // Split a block so that one does.
+ if (!CreateCommonTailOnlyBlock(PredBB, SuccBB,
+ maxCommonTailLength, commonTailIndex)) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
+ }
+
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ // Recompute common tail MBB's edge weights and block frequency.
+ setCommonTailEdgeWeights(*MBB);
+
+ // MBB is common tail. Adjust all other BB's to jump to this one.
+ // Traversal must be forwards so erases work.
+ DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
+ << " for ");
+ for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+ if (commonTailIndex == i)
+ continue;
+ DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", "));
+ // Merge MMOs from memory operations as needed.
+ mergeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB);
+ // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+ ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
+ // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+ MergePotentials.erase(SameTails[i].getMPIter());
+ }
+ DEBUG(dbgs() << "\n");
+ // We leave commonTailIndex in the worklist in case there are other blocks
+ // that match it with a smaller number of instructions.
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+ if (!EnableTailMerge) return MadeChange;
+
+ // First find blocks with no successors.
+ // Block placement does not create new tail merging opportunities for these
+ // blocks.
+ if (!AfterBlockPlacement) {
+ MergePotentials.clear();
+ for (MachineBasicBlock &MBB : MF) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+ if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
+ }
+
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
+ }
+
+ // Look at blocks (IBB) with multiple predecessors (PBB).
+ // We change each predecessor to a canonical form, by
+ // (1) temporarily removing any unconditional branch from the predecessor
+ // to IBB, and
+ // (2) alter conditional branches so they branch to the other block
+ // not IBB; this may require adding back an unconditional branch to IBB
+ // later, where there wasn't one coming in. E.g.
+ // Bcc IBB
+ // fallthrough to QBB
+ // here becomes
+ // Bncc QBB
+ // with a conceptual B to IBB after that, which never actually exists.
+ // With those changes, we see whether the predecessors' tails match,
+ // and merge them if so. We change things out of canonical form and
+ // back to the way they were later in the process. (OptimizeBranches
+ // would undo some of this, but we can't use it, because we'd get into
+ // a compile-time infinite loop repeatedly doing and undoing the same
+ // transformations.)
+
+ for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
+ I != E; ++I) {
+ if (I->pred_size() < 2) continue;
+ SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+ MachineBasicBlock *IBB = &*I;
+ MachineBasicBlock *PredBB = &*std::prev(I);
+ MergePotentials.clear();
+ for (MachineBasicBlock *PBB : I->predecessors()) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+
+ if (TriedMerging.count(PBB))
+ continue;
+
+ // Skip blocks that loop to themselves, can't tail merge these.
+ if (PBB == IBB)
+ continue;
+
+ // Visit each predecessor only once.
+ if (!UniquePreds.insert(PBB).second)
+ continue;
+
+ // Skip blocks which may jump to a landing pad. Can't tail merge these.
+ if (PBB->hasEHPadSuccessor())
+ continue;
+
+ // Bail out if the loop header (IBB) is not the top of the loop chain
+ // after the block placement. Otherwise, the common tail of IBB's
+ // predecessors may become the loop top if block placement is called again
+ // and the predecessors may branch to this common tail.
+ // FIXME: Relaxed this check if the algorithm of finding loop top is
+ // changed in MBP.
+ if (AfterBlockPlacement && MLI)
+ if (MachineLoop *ML = MLI->getLoopFor(IBB))
+ if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB))
+ continue;
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+ // Failing case: IBB is the target of a cbr, and we cannot reverse the
+ // branch.
+ SmallVector<MachineOperand, 4> NewCond(Cond);
+ if (!Cond.empty() && TBB == IBB) {
+ if (TII->ReverseBranchCondition(NewCond))
+ continue;
+ // This is the QBB case described above
+ if (!FBB) {
+ auto Next = ++PBB->getIterator();
+ if (Next != MF.end())
+ FBB = &*Next;
+ }
+ }
+
+ // Failing case: the only way IBB can be reached from PBB is via
+ // exception handling. Happens for landing pads. Would be nice to have
+ // a bit in the edge so we didn't have to do all this.
+ if (IBB->isEHPad()) {
+ MachineFunction::iterator IP = ++PBB->getIterator();
+ MachineBasicBlock *PredNextBB = nullptr;
+ if (IP != MF.end())
+ PredNextBB = &*IP;
+ if (!TBB) {
+ if (IBB != PredNextBB) // fallthrough
+ continue;
+ } else if (FBB) {
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
+ continue;
+ } else if (Cond.empty()) {
+ if (TBB != IBB) // ubr
+ continue;
+ } else {
+ if (TBB != IBB && IBB != PredNextBB) // cbr
+ continue;
+ }
+ }
+
+ // Remove the unconditional branch at the end, if any.
+ if (TBB && (Cond.empty() || FBB)) {
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*PBB);
+ if (!Cond.empty())
+ // reinsert conditional branch only, for now
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
+ NewCond, dl);
+ }
+
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB));
+ }
+ }
+
+ // If this is a large problem, avoid visiting the same basic blocks multiple
+ // times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+
+ // Reinsert an unconditional branch if needed. The 1 below can occur as a
+ // result of removing blocks in TryTailMergeBlocks.
+ PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
+ }
+
+ return MadeChange;
+}
+
+void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
+ SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size());
+ BlockFrequency AccumulatedMBBFreq;
+
+ // Aggregate edge frequency of successor edge j:
+ // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)),
+ // where bb is a basic block that is in SameTails.
+ for (const auto &Src : SameTails) {
+ const MachineBasicBlock *SrcMBB = Src.getBlock();
+ BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB);
+ AccumulatedMBBFreq += BlockFreq;
+
+ // It is not necessary to recompute edge weights if TailBB has less than two
+ // successors.
+ if (TailMBB.succ_size() <= 1)
+ continue;
+
+ auto EdgeFreq = EdgeFreqLs.begin();
+
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq)
+ *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI);
+ }
+
+ MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq);
+
+ if (TailMBB.succ_size() <= 1)
+ return;
+
+ auto SumEdgeFreq =
+ std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0))
+ .getFrequency();
+ auto EdgeFreq = EdgeFreqLs.begin();
+
+ if (SumEdgeFreq > 0) {
+ for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+ SuccI != SuccE; ++SuccI, ++EdgeFreq) {
+ auto Prob = BranchProbability::getBranchProbability(
+ EdgeFreq->getFrequency(), SumEdgeFreq);
+ TailMBB.setSuccProbability(SuccI, Prob);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+ // Renumbering blocks alters funclet membership, recalculate it.
+ FuncletMembership = getFuncletMembership(MF);
+
+ for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
+ I != E; ) {
+ MachineBasicBlock *MBB = &*I++;
+ MadeChange |= OptimizeBlock(MBB);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+
+ return MadeChange;
+}
+
+// Blocks should be considered empty if they contain only debug info;
+// else the debug info would affect codegen.
+static bool IsEmptyBlock(MachineBasicBlock *MBB) {
+ return MBB->getFirstNonDebugInstr() == MBB->end();
+}
+
+// Blocks with only debug info and branches should be considered the same
+// as blocks with only branches.
+static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
+ MachineBasicBlock::iterator I = MBB->getFirstNonDebugInstr();
+ assert(I != MBB->end() && "empty block!");
+ return I->isBranch();
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2. This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+ MachineBasicBlock *MBB2) {
+ // Right now, we use a simple heuristic. If MBB2 ends with a call, and
+ // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
+ // optimize branches that branch to either a return block or an assert block
+ // into a fallthrough to the return.
+ MachineBasicBlock::iterator MBB1I = MBB1->getLastNonDebugInstr();
+ MachineBasicBlock::iterator MBB2I = MBB2->getLastNonDebugInstr();
+ if (MBB1I == MBB1->end() || MBB2I == MBB2->end())
+ return false;
+
+ // If there is a clear successor ordering we make sure that one block
+ // will fall through to the next
+ if (MBB1->isSuccessor(MBB2)) return true;
+ if (MBB2->isSuccessor(MBB1)) return false;
+
+ return MBB2I->isCall() && !MBB1I->isCall();
+}
+
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I != MBB.end() && I->isBranch())
+ return I->getDebugLoc();
+ return DebugLoc();
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block. This is never called on the entry block.
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+ bool MadeChange = false;
+ MachineFunction &MF = *MBB->getParent();
+ReoptimizeBlock:
+
+ MachineFunction::iterator FallThrough = MBB->getIterator();
+ ++FallThrough;
+
+ // Make sure MBB and FallThrough belong to the same funclet.
+ bool SameFunclet = true;
+ if (!FuncletMembership.empty() && FallThrough != MF.end()) {
+ auto MBBFunclet = FuncletMembership.find(MBB);
+ assert(MBBFunclet != FuncletMembership.end());
+ auto FallThroughFunclet = FuncletMembership.find(&*FallThrough);
+ assert(FallThroughFunclet != FuncletMembership.end());
+ SameFunclet = MBBFunclet->second == FallThroughFunclet->second;
+ }
+
+ // If this block is empty, make everyone use its fall-through, not the block
+ // explicitly. Landing pads should not do this since the landing-pad table
+ // points to this block. Blocks with their addresses taken shouldn't be
+ // optimized away.
+ if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() &&
+ SameFunclet) {
+ // Dead block? Leave for cleanup later.
+ if (MBB->pred_empty()) return MadeChange;
+
+ if (FallThrough == MF.end()) {
+ // TODO: Simplify preds to not branch here if possible!
+ } else if (FallThrough->isEHPad()) {
+ // Don't rewrite to a landing pad fallthough. That could lead to the case
+ // where a BB jumps to more than one landing pad.
+ // TODO: Is it ever worth rewriting predecessors which don't already
+ // jump to a landing pad, and so can safely jump to the fallthrough?
+ } else if (MBB->isSuccessor(&*FallThrough)) {
+ // Rewrite all predecessors of the old block to go to the fallthrough
+ // instead.
+ while (!MBB->pred_empty()) {
+ MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+ Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough);
+ }
+ // If MBB was the target of a jump table, update jump tables to go to the
+ // fallthrough instead.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough);
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
+
+ // Check to see if we can simplify the terminator of the block before this
+ // one.
+ MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB));
+
+ MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->analyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ if (!PriorUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+ !PriorCond.empty());
+
+ // If the previous branch is conditional and both conditions go to the same
+ // destination, remove the branch, replacing it with an unconditional one or
+ // a fall-through.
+ if (PriorTBB && PriorTBB == PriorFBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ PriorCond.clear();
+ if (PriorTBB != MBB)
+ TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the previous block unconditionally falls through to this block and
+ // this block has no other predecessors, move the contents of this block
+ // into the prior block. This doesn't usually happen when SimplifyCFG
+ // has been used, but it can happen if tail merging splits a fall-through
+ // predecessor of a block.
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+ PrevBB.succ_size() == 1 &&
+ !MBB->hasAddressTaken() && !MBB->isEHPad()) {
+ DEBUG(dbgs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *MBB);
+ // Remove redundant DBG_VALUEs first.
+ if (PrevBB.begin() != PrevBB.end()) {
+ MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
+ --PrevBBIter;
+ MachineBasicBlock::iterator MBBIter = MBB->begin();
+ // Check if DBG_VALUE at the end of PrevBB is identical to the
+ // DBG_VALUE at the beginning of MBB.
+ while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
+ && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
+ if (!MBBIter->isIdenticalTo(*PrevBBIter))
+ break;
+ MachineInstr &DuplicateDbg = *MBBIter;
+ ++MBBIter; -- PrevBBIter;
+ DuplicateDbg.eraseFromParent();
+ }
+ }
+ PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+
+ // If the previous branch *only* branches to *this* block (conditional or
+ // not) remove the branch.
+ if (PriorTBB == MBB && !PriorFBB) {
+ TII->RemoveBranch(PrevBB);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches somewhere else on the condition and here if
+ // the condition is false, remove the uncond second branch.
+ if (PriorFBB == MBB) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+
+ // If the prior block branches here on true and somewhere else on false, and
+ // if the branch condition is reversible, reverse the branch to create a
+ // fall-through.
+ if (PriorTBB == MBB) {
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this block has no successors (e.g. it is a return block or ends with
+ // a call to a no-return function like abort or __cxa_throw) and if the pred
+ // falls through into this block, and if it would otherwise fall through
+ // into the block after this, move this block to the end of the function.
+ //
+ // We consider it more likely that execution will stay in the function (e.g.
+ // due to loops) than it is to exit it. This asserts in loops etc, moving
+ // the assert condition out of the loop body.
+ if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB &&
+ MachineFunction::iterator(PriorTBB) == FallThrough &&
+ !MBB->canFallThrough()) {
+ bool DoTransform = true;
+
+ // We have to be careful that the succs of PredBB aren't both no-successor
+ // blocks. If neither have successors and if PredBB is the second from
+ // last block in the function, we'd just keep swapping the two blocks for
+ // last. Only do the swap if one is clearly better to fall through than
+ // the other.
+ if (FallThrough == --MF.end() &&
+ !IsBetterFallthrough(PriorTBB, MBB))
+ DoTransform = false;
+
+ if (DoTransform) {
+ // Reverse the branch so we will fall through on the previous true cond.
+ SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+ if (!TII->ReverseBranchCondition(NewPriorCond)) {
+ DEBUG(dbgs() << "\nMoving MBB: " << *MBB
+ << "To make fallthrough to: " << *PriorTBB << "\n");
+
+ DebugLoc dl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
+
+ // Move this block to the end of the function.
+ MBB->moveAfter(&MF.back());
+ MadeChange = true;
+ ++NumBranchOpts;
+ return MadeChange;
+ }
+ }
+ }
+ }
+
+ // Analyze the branch in the current block.
+ MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
+ SmallVector<MachineOperand, 4> CurCond;
+ bool CurUnAnalyzable =
+ TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+ if (!CurUnAnalyzable) {
+ // If the CFG for the prior block has extra edges, remove them.
+ MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+ // If this is a two-way branch, and the FBB branches to this block, reverse
+ // the condition so the single-basic-block loop is faster. Instead of:
+ // Loop: xxx; jcc Out; jmp Loop
+ // we want:
+ // Loop: xxx; jncc Loop; jmp Out
+ if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+ SmallVector<MachineOperand, 4> NewCond(CurCond);
+ if (!TII->ReverseBranchCondition(NewCond)) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
+ TII->RemoveBranch(*MBB);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // If this branch is the only thing in its block, see if we can forward
+ // other blocks across it.
+ if (CurTBB && CurCond.empty() && !CurFBB &&
+ IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
+ !MBB->hasAddressTaken() && !MBB->isEHPad()) {
+ DebugLoc dl = getBranchDebugLoc(*MBB);
+ // This block may contain just an unconditional branch. Because there can
+ // be 'non-branch terminators' in the block, try removing the branch and
+ // then seeing if the block is empty.
+ TII->RemoveBranch(*MBB);
+ // If the only things remaining in the block are debug info, remove these
+ // as well, so this will behave the same as an empty block in non-debug
+ // mode.
+ if (IsEmptyBlock(MBB)) {
+ // Make the block empty, losing the debug info (we could probably
+ // improve this in some cases.)
+ MBB->erase(MBB->begin(), MBB->end());
+ }
+ // If this block is just an unconditional branch to CurTBB, we can
+ // usually completely eliminate the block. The only case we cannot
+ // completely eliminate the block is when the block before this one
+ // falls through into MBB and we can't understand the prior block's branch
+ // condition.
+ if (MBB->empty()) {
+ bool PredHasNoFallThrough = !PrevBB.canFallThrough();
+ if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+ !PrevBB.isSuccessor(MBB)) {
+ // If the prior block falls through into us, turn it into an
+ // explicit branch to us to make updates simpler.
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ PriorTBB != MBB && PriorFBB != MBB) {
+ if (!PriorTBB) {
+ assert(PriorCond.empty() && !PriorFBB &&
+ "Bad branch analysis");
+ PriorTBB = MBB;
+ } else {
+ assert(!PriorFBB && "Machine CFG out of date!");
+ PriorFBB = MBB;
+ }
+ DebugLoc pdl = getBranchDebugLoc(PrevBB);
+ TII->RemoveBranch(PrevBB);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
+ }
+
+ // Iterate through all the predecessors, revectoring each in-turn.
+ size_t PI = 0;
+ bool DidChange = false;
+ bool HasBranchToSelf = false;
+ while(PI != MBB->pred_size()) {
+ MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+ if (PMBB == MBB) {
+ // If this block has an uncond branch to itself, leave it.
+ ++PI;
+ HasBranchToSelf = true;
+ } else {
+ DidChange = true;
+ PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ // If this change resulted in PMBB ending in a conditional
+ // branch where both conditions go to the same destination,
+ // change this to an unconditional branch (and fix the CFG).
+ MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr;
+ SmallVector<MachineOperand, 4> NewCurCond;
+ bool NewCurUnAnalyzable = TII->analyzeBranch(
+ *PMBB, NewCurTBB, NewCurFBB, NewCurCond, true);
+ if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+ DebugLoc pdl = getBranchDebugLoc(*PMBB);
+ TII->RemoveBranch(*PMBB);
+ NewCurCond.clear();
+ TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
+ MadeChange = true;
+ ++NumBranchOpts;
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false);
+ }
+ }
+ }
+
+ // Change any jumptables to go to the new MBB.
+ if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+ MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
+ if (DidChange) {
+ ++NumBranchOpts;
+ MadeChange = true;
+ if (!HasBranchToSelf) return MadeChange;
+ }
+ }
+ }
+
+ // Add the branch back if the block is more than just an uncond branch.
+ TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
+ }
+ }
+
+ // If the prior block doesn't fall through into this block, and if this
+ // block doesn't fall through into some other block, see if we can find a
+ // place to move this block where a fall-through will happen.
+ if (!PrevBB.canFallThrough()) {
+
+ // Now we know that there was no fall-through into this block, check to
+ // see if it has a fall-through into its successor.
+ bool CurFallsThru = MBB->canFallThrough();
+
+ if (!MBB->isEHPad()) {
+ // Check all the predecessors of this block. If one of them has no fall
+ // throughs, move this block right after it.
+ for (MachineBasicBlock *PredBB : MBB->predecessors()) {
+ // Analyze the branch at the end of the pred.
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (PredBB != MBB && !PredBB->canFallThrough() &&
+ !TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) &&
+ (!CurFallsThru || !CurTBB || !CurFBB) &&
+ (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+ // If the current block doesn't fall through, just move it.
+ // If the current block can fall through and does not end with a
+ // conditional branch, we need to append an unconditional jump to
+ // the (current) next block. To avoid a possible compile-time
+ // infinite loop, move blocks only backward in this case.
+ // Also, if there are already 2 branches here, we cannot add a third;
+ // this means we have the case
+ // Bcc next
+ // B elsewhere
+ // next:
+ if (CurFallsThru) {
+ MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());
+ CurCond.clear();
+ TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
+ }
+ MBB->moveAfter(PredBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+ }
+
+ if (!CurFallsThru) {
+ // Check all successors to see if we can move this block before it.
+ for (MachineBasicBlock *SuccBB : MBB->successors()) {
+ // Analyze the branch at the end of the block before the succ.
+ MachineFunction::iterator SuccPrev = --SuccBB->getIterator();
+
+ // If this block doesn't already fall-through to that successor, and if
+ // the succ doesn't already have a block that can fall through into it,
+ // and if the successor isn't an EH destination, we can arrange for the
+ // fallthrough to happen.
+ if (SuccBB != MBB && &*SuccPrev != MBB &&
+ !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
+ !SuccBB->isEHPad()) {
+ MBB->moveBefore(SuccBB);
+ MadeChange = true;
+ goto ReoptimizeBlock;
+ }
+ }
+
+ // Okay, there is no really great place to put this block. If, however,
+ // the block before this one would be a fall-through if this block were
+ // removed, move this block to the end of the function.
+ MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
+ SmallVector<MachineOperand, 4> PrevCond;
+ // We're looking for cases where PrevBB could possibly fall through to
+ // FallThrough, but if FallThrough is an EH pad that wouldn't be useful
+ // so here we skip over any EH pads so we might have a chance to find
+ // a branch target from PrevBB.
+ while (FallThrough != MF.end() && FallThrough->isEHPad())
+ ++FallThrough;
+ // Now check to see if the current block is sitting between PrevBB and
+ // a block to which it could fall through.
+ if (FallThrough != MF.end() &&
+ !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
+ PrevBB.isSuccessor(&*FallThrough)) {
+ MBB->moveAfter(&MF.back());
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Hoist Common Code
+//===----------------------------------------------------------------------===//
+
+/// HoistCommonCode - Hoist common instruction sequences at the start of basic
+/// blocks to their common predecessor.
+bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
+ bool MadeChange = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = &*I++;
+ MadeChange |= HoistCommonCodeInSuccs(MBB);
+ }
+
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock *SuccBB : BB->successors())
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ return nullptr;
+}
+
+template <class Container>
+static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
+ Container &Set) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Set.insert(*AI);
+ } else {
+ Set.insert(Reg);
+ }
+}
+
+/// findHoistingInsertPosAndDeps - Find the location to move common instructions
+/// in successors to. The location is usually just before the terminator,
+/// however if the terminator is a conditional branch and its previous
+/// instruction is the flag setting instruction, the previous instruction is
+/// the preferred location. This function also gathers uses and defs of the
+/// instructions from the insertion point to the end of the block. The data is
+/// used by HoistCommonCodeInSuccs to ensure safety.
+static
+MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ SmallSet<unsigned,4> &Uses,
+ SmallSet<unsigned,4> &Defs) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ if (!TII->isUnpredicatedTerminator(*Loc))
+ return MBB->end();
+
+ for (const MachineOperand &MO : Loc->operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ addRegAndItsAliases(Reg, TRI, Uses);
+ } else {
+ if (!MO.isDead())
+ // Don't try to hoist code in the rare case the terminator defines a
+ // register that is later used.
+ return MBB->end();
+
+ // If the terminator defines a register, make sure we don't hoist
+ // the instruction whose def might be clobbered by the terminator.
+ addRegAndItsAliases(Reg, TRI, Defs);
+ }
+ }
+
+ if (Uses.empty())
+ return Loc;
+ if (Loc == MBB->begin())
+ return MBB->end();
+
+ // The terminator is probably a conditional branch, try not to separate the
+ // branch from condition setting instruction.
+ MachineBasicBlock::iterator PI = Loc;
+ --PI;
+ while (PI != MBB->begin() && PI->isDebugValue())
+ --PI;
+
+ bool IsDef = false;
+ for (const MachineOperand &MO : PI->operands()) {
+ // If PI has a regmask operand, it is probably a call. Separate away.
+ if (MO.isRegMask())
+ return Loc;
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (Uses.count(Reg)) {
+ IsDef = true;
+ break;
+ }
+ }
+ if (!IsDef)
+ // The condition setting instruction is not just before the conditional
+ // branch.
+ return Loc;
+
+ // Be conservative, don't insert instruction above something that may have
+ // side-effects. And since it's potentially bad to separate flag setting
+ // instruction from the conditional branch, just abort the optimization
+ // completely.
+ // Also avoid moving code above predicated instruction since it's hard to
+ // reason about register liveness with predicated instruction.
+ bool DontMoveAcrossStore = true;
+ if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI))
+ return MBB->end();
+
+
+ // Find out what registers are live. Note this routine is ignoring other live
+ // registers which are only used by instructions in successor blocks.
+ for (const MachineOperand &MO : PI->operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ addRegAndItsAliases(Reg, TRI, Uses);
+ } else {
+ if (Uses.erase(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ }
+ }
+ addRegAndItsAliases(Reg, TRI, Defs);
+ }
+ }
+
+ return PI;
+}
+
+/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
+/// sequence at the start of the function, move the instructions before MBB
+/// terminator if it's legal.
+bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->analyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
+ return false;
+
+ if (!FBB) FBB = findFalseBlock(MBB, TBB);
+ if (!FBB)
+ // Malformed bcc? True and false blocks are the same?
+ return false;
+
+ // Restrict the optimization to cases where MBB is the only predecessor,
+ // it is an obvious win.
+ if (TBB->pred_size() > 1 || FBB->pred_size() > 1)
+ return false;
+
+ // Find a suitable position to hoist the common instructions to. Also figure
+ // out which registers are used or defined by instructions from the insertion
+ // point to the end of the block.
+ SmallSet<unsigned, 4> Uses, Defs;
+ MachineBasicBlock::iterator Loc =
+ findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs);
+ if (Loc == MBB->end())
+ return false;
+
+ bool HasDups = false;
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallSet<unsigned, 4> LocalDefsSet;
+ MachineBasicBlock::iterator TIB = TBB->begin();
+ MachineBasicBlock::iterator FIB = FBB->begin();
+ MachineBasicBlock::iterator TIE = TBB->end();
+ MachineBasicBlock::iterator FIE = FBB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead))
+ break;
+
+ if (TII->isPredicated(*TIB))
+ // Hard to reason about register liveness with predicated instruction.
+ break;
+
+ bool IsSafe = true;
+ for (MachineOperand &MO : TIB->operands()) {
+ // Don't attempt to hoist instructions with register masks.
+ if (MO.isRegMask()) {
+ IsSafe = false;
+ break;
+ }
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(Reg)) {
+ // Avoid clobbering a register that's used by the instruction at
+ // the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (Defs.count(Reg) && !MO.isDead()) {
+ // Don't hoist the instruction if the def would be clobber by the
+ // instruction at the point insertion. FIXME: This is overly
+ // conservative. It should be possible to hoist the instructions
+ // in BB2 in the following example:
+ // BB1:
+ // r1, eflag = op1 r2, r3
+ // brcc eflag
+ //
+ // BB2:
+ // r1 = op2, ...
+ // = op3, r1<kill>
+ IsSafe = false;
+ break;
+ }
+ } else if (!LocalDefsSet.count(Reg)) {
+ if (Defs.count(Reg)) {
+ // Use is defined by the instruction at the point of insertion.
+ IsSafe = false;
+ break;
+ }
+
+ if (MO.isKill() && Uses.count(Reg))
+ // Kills a register that's read by the instruction at the point of
+ // insertion. Remove the kill marker.
+ MO.setIsKill(false);
+ }
+ }
+ if (!IsSafe)
+ break;
+
+ bool DontMoveAcrossStore = true;
+ if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore))
+ break;
+
+ // Remove kills from LocalDefsSet, these registers had short live ranges.
+ for (const MachineOperand &MO : TIB->operands()) {
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !LocalDefsSet.count(Reg))
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LocalDefsSet.erase(*AI);
+ } else {
+ LocalDefsSet.erase(Reg);
+ }
+ }
+
+ // Track local defs so we can update liveins.
+ for (const MachineOperand &MO : TIB->operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ LocalDefs.push_back(Reg);
+ addRegAndItsAliases(Reg, TRI, LocalDefsSet);
+ }
+
+ HasDups = true;
+ ++TIB;
+ ++FIB;
+ }
+
+ if (!HasDups)
+ return false;
+
+ MBB->splice(Loc, TBB, TBB->begin(), TIB);
+ FBB->erase(FBB->begin(), FIB);
+
+ // Update livein's.
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Def = LocalDefs[i];
+ if (LocalDefsSet.count(Def)) {
+ TBB->addLiveIn(Def);
+ FBB->addLiveIn(Def);
+ }
+ }
+
+ ++NumHoist;
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
new file mode 100644
index 000000000000..36a5a2e2c97c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,160 @@
+//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H
+#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/BlockFrequency.h"
+#include <vector>
+
+namespace llvm {
+ class MachineBlockFrequencyInfo;
+ class MachineBranchProbabilityInfo;
+ class MachineFunction;
+ class MachineModuleInfo;
+ class MachineLoopInfo;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+
+ class LLVM_LIBRARY_VISIBILITY BranchFolder {
+ public:
+ class MBFIWrapper;
+
+ explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+ MBFIWrapper &MBFI,
+ const MachineBranchProbabilityInfo &MBPI);
+
+ bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
+ MachineLoopInfo *mli = nullptr,
+ bool AfterPlacement = false);
+
+ private:
+ class MergePotentialsElt {
+ unsigned Hash;
+ MachineBasicBlock *Block;
+ public:
+ MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+ : Hash(h), Block(b) {}
+
+ unsigned getHash() const { return Hash; }
+ MachineBasicBlock *getBlock() const { return Block; }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ Block = MBB;
+ }
+
+ bool operator<(const MergePotentialsElt &) const;
+ };
+ typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+ std::vector<MergePotentialsElt> MergePotentials;
+ SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership;
+
+ class SameTailElt {
+ MPIterator MPIter;
+ MachineBasicBlock::iterator TailStartPos;
+ public:
+ SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+ : MPIter(mp), TailStartPos(tsp) {}
+
+ MPIterator getMPIter() const {
+ return MPIter;
+ }
+ MergePotentialsElt &getMergePotentialsElt() const {
+ return *getMPIter();
+ }
+ MachineBasicBlock::iterator getTailStartPos() const {
+ return TailStartPos;
+ }
+ unsigned getHash() const {
+ return getMergePotentialsElt().getHash();
+ }
+ MachineBasicBlock *getBlock() const {
+ return getMergePotentialsElt().getBlock();
+ }
+ bool tailIsWholeBlock() const {
+ return TailStartPos == getBlock()->begin();
+ }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ getMergePotentialsElt().setBlock(MBB);
+ }
+ void setTailStartPos(MachineBasicBlock::iterator Pos) {
+ TailStartPos = Pos;
+ }
+ };
+ std::vector<SameTailElt> SameTails;
+
+ bool AfterBlockPlacement;
+ bool EnableTailMerge;
+ bool EnableHoistCommonCode;
+ bool UpdateLiveIns;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineModuleInfo *MMI;
+ MachineLoopInfo *MLI;
+ LivePhysRegs LiveRegs;
+
+ public:
+ /// \brief This class keeps track of branch frequencies of newly created
+ /// blocks and tail-merged blocks.
+ class MBFIWrapper {
+ public:
+ MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {}
+ BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
+ void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F);
+ raw_ostream &printBlockFreq(raw_ostream &OS,
+ const MachineBasicBlock *MBB) const;
+ raw_ostream &printBlockFreq(raw_ostream &OS,
+ const BlockFrequency Freq) const;
+
+ private:
+ const MachineBlockFrequencyInfo &MBFI;
+ DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq;
+ };
+
+ private:
+ MBFIWrapper &MBBFreqInfo;
+ const MachineBranchProbabilityInfo &MBPI;
+
+ bool TailMergeBlocks(MachineFunction &MF);
+ bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
+ void computeLiveIns(MachineBasicBlock &MBB);
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+ MachineBasicBlock *NewDest);
+ MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+ MachineBasicBlock::iterator BBI1,
+ const BasicBlock *BB);
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB);
+ void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
+ bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ MachineBasicBlock *SuccBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex);
+
+ bool OptimizeBranches(MachineFunction &MF);
+ bool OptimizeBlock(MachineBasicBlock *MBB);
+ void RemoveDeadBlock(MachineBasicBlock *MBB);
+ bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+
+ bool HoistCommonCode(MachineFunction &MF);
+ bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
+ };
+}
+
+#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
new file mode 100644
index 000000000000..ff7c99de0420
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp
@@ -0,0 +1,139 @@
+//===-- BuiltinGCs.cpp - Boilerplate for our built in GC types --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the boilerplate required to define our various built in
+// gc lowering strategies.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+namespace {
+
+/// An example GC which attempts to be compatibile with Erlang/OTP garbage
+/// collector.
+///
+/// The frametable emitter is in ErlangGCPrinter.cpp.
+class ErlangGC : public GCStrategy {
+public:
+ ErlangGC() {
+ InitRoots = false;
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+ CustomRoots = false;
+ }
+};
+
+/// An example GC which attempts to be compatible with Objective Caml 3.10.0
+///
+/// The frametable emitter is in OcamlGCPrinter.cpp.
+class OcamlGC : public GCStrategy {
+public:
+ OcamlGC() {
+ NeededSafePoints = 1 << GC::PostCall;
+ UsesMetadata = true;
+ }
+};
+
+/// A GC strategy for uncooperative targets. This implements lowering for the
+/// llvm.gc* intrinsics for targets that do not natively support them (which
+/// includes the C backend). Note that the code generated is not quite as
+/// efficient as algorithms which generate stack maps to identify roots.
+///
+/// In order to support this particular transformation, all stack roots are
+/// coallocated in the stack. This allows a fully target-independent stack map
+/// while introducing only minor runtime overhead.
+class ShadowStackGC : public GCStrategy {
+public:
+ ShadowStackGC() {
+ InitRoots = true;
+ CustomRoots = true;
+ }
+};
+
+/// A GCStrategy which serves as an example for the usage of a statepoint based
+/// lowering strategy. This GCStrategy is intended to suitable as a default
+/// implementation usable with any collector which can consume the standard
+/// stackmap format generated by statepoints, uses the default addrespace to
+/// distinguish between gc managed and non-gc managed pointers, and has
+/// reasonable relocation semantics.
+class StatepointGC : public GCStrategy {
+public:
+ StatepointGC() {
+ UseStatepoints = true;
+ // These options are all gc.root specific, we specify them so that the
+ // gc.root lowering code doesn't run.
+ InitRoots = false;
+ NeededSafePoints = 0;
+ UsesMetadata = false;
+ CustomRoots = false;
+ }
+ Optional<bool> isGCManagedPointer(const Type *Ty) const override {
+ // Method is only valid on pointer typed values.
+ const PointerType *PT = cast<PointerType>(Ty);
+ // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
+ // GC managed heap. We know that a pointer into this heap needs to be
+ // updated and that no other pointer does. Note that addrspace(1) is used
+ // only as an example, it has no special meaning, and is not reserved for
+ // GC usage.
+ return (1 == PT->getAddressSpace());
+ }
+};
+
+/// A GCStrategy for the CoreCLR Runtime. The strategy is similar to
+/// Statepoint-example GC, but differs from it in certain aspects, such as:
+/// 1) Base-pointers need not be explicitly tracked and reported for
+/// interior pointers
+/// 2) Uses a different format for encoding stack-maps
+/// 3) Location of Safe-point polls: polls are only needed before loop-back
+/// edges and before tail-calls (not needed at function-entry)
+///
+/// The above differences in behavior are to be implemented in upcoming
+/// checkins.
+class CoreCLRGC : public GCStrategy {
+public:
+ CoreCLRGC() {
+ UseStatepoints = true;
+ // These options are all gc.root specific, we specify them so that the
+ // gc.root lowering code doesn't run.
+ InitRoots = false;
+ NeededSafePoints = 0;
+ UsesMetadata = false;
+ CustomRoots = false;
+ }
+ Optional<bool> isGCManagedPointer(const Type *Ty) const override {
+ // Method is only valid on pointer typed values.
+ const PointerType *PT = cast<PointerType>(Ty);
+ // We pick addrspace(1) as our GC managed heap.
+ return (1 == PT->getAddressSpace());
+ }
+};
+}
+
+// Register all the above so that they can be found at runtime. Note that
+// these static initializers are important since the registration list is
+// constructed from their storage.
+static GCRegistry::Add<ErlangGC> A("erlang",
+ "erlang-compatible garbage collector");
+static GCRegistry::Add<OcamlGC> B("ocaml", "ocaml 3.10-compatible GC");
+static GCRegistry::Add<ShadowStackGC>
+ C("shadow-stack", "Very portable GC for uncooperative code generators");
+static GCRegistry::Add<StatepointGC> D("statepoint-example",
+ "an example strategy for statepoint");
+static GCRegistry::Add<CoreCLRGC> E("coreclr", "CoreCLR-compatible GC");
+
+// Provide hooks to ensure the containing library is fully loaded.
+void llvm::linkErlangGC() {}
+void llvm::linkOcamlGC() {}
+void llvm::linkShadowStackGC() {}
+void llvm::linkStatepointExampleGC() {}
+void llvm::linkCoreCLRGC() {}
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 000000000000..dc2d38a95f99
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,236 @@
+//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "calcspillweights"
+
+void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
+ MachineFunction &MF,
+ VirtRegMap *VRM,
+ const MachineLoopInfo &MLI,
+ const MachineBlockFrequencyInfo &MBFI,
+ VirtRegAuxInfo::NormalizingFn norm) {
+ DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm);
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg));
+ }
+}
+
+// Return the preferred allocation register for reg, given a COPY instruction.
+static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+ const TargetRegisterInfo &tri,
+ const MachineRegisterInfo &mri) {
+ unsigned sub, hreg, hsub;
+ if (mi->getOperand(0).getReg() == reg) {
+ sub = mi->getOperand(0).getSubReg();
+ hreg = mi->getOperand(1).getReg();
+ hsub = mi->getOperand(1).getSubReg();
+ } else {
+ sub = mi->getOperand(1).getSubReg();
+ hreg = mi->getOperand(0).getReg();
+ hsub = mi->getOperand(0).getSubReg();
+ }
+
+ if (!hreg)
+ return 0;
+
+ if (TargetRegisterInfo::isVirtualRegister(hreg))
+ return sub == hsub ? hreg : 0;
+
+ const TargetRegisterClass *rc = mri.getRegClass(reg);
+
+ // Only allow physreg hints in rc.
+ if (sub == 0)
+ return rc->contains(hreg) ? hreg : 0;
+
+ // reg:sub should match the physreg hreg.
+ return tri.getMatchingSuperReg(hreg, sub, rc);
+}
+
+// Check if all values in LI are rematerializable
+static bool isRematerializable(const LiveInterval &LI,
+ const LiveIntervals &LIS,
+ VirtRegMap *VRM,
+ const TargetInstrInfo &TII) {
+ unsigned Reg = LI.reg;
+ unsigned Original = VRM ? VRM->getOriginal(Reg) : 0;
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ if (VNI->isPHIDef())
+ return false;
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+
+ // Trace copies introduced by live range splitting. The inline
+ // spiller can rematerialize through these copies, so the spill
+ // weight must reflect this.
+ if (VRM) {
+ while (MI->isFullCopy()) {
+ // The copy destination must match the interval register.
+ if (MI->getOperand(0).getReg() != Reg)
+ return false;
+
+ // Get the source register.
+ Reg = MI->getOperand(1).getReg();
+
+ // If the original (pre-splitting) registers match this
+ // copy came from a split.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ VRM->getOriginal(Reg) != Original)
+ return false;
+
+ // Follow the copy live-in value.
+ const LiveInterval &SrcLI = LIS.getInterval(Reg);
+ LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+ VNI = SrcQ.valueIn();
+ assert(VNI && "Copy from non-existing value");
+ if (VNI->isPHIDef())
+ return false;
+ MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
+ }
+ }
+
+ if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis()))
+ return false;
+ }
+ return true;
+}
+
+void
+VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
+ MachineRegisterInfo &mri = MF.getRegInfo();
+ const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
+ MachineBasicBlock *mbb = nullptr;
+ MachineLoop *loop = nullptr;
+ bool isExiting = false;
+ float totalWeight = 0;
+ unsigned numInstr = 0; // Number of instructions using li
+ SmallPtrSet<MachineInstr*, 8> visited;
+
+ // Find the best physreg hint and the best virtreg hint.
+ float bestPhys = 0, bestVirt = 0;
+ unsigned hintPhys = 0, hintVirt = 0;
+
+ // Don't recompute a target specific hint.
+ bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+
+ // Don't recompute spill weight for an unspillable register.
+ bool Spillable = li.isSpillable();
+
+ for (MachineRegisterInfo::reg_instr_iterator
+ I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
+ I != E; ) {
+ MachineInstr *mi = &*(I++);
+ numInstr++;
+ if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
+ continue;
+ if (!visited.insert(mi).second)
+ continue;
+
+ float weight = 1.0f;
+ if (Spillable) {
+ // Get loop info for mi.
+ if (mi->getParent() != mbb) {
+ mbb = mi->getParent();
+ loop = Loops.getLoopFor(mbb);
+ isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ }
+
+ // Calculate instr weight.
+ bool reads, writes;
+ std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+ weight = LiveIntervals::getSpillWeight(writes, reads, &MBFI, *mi);
+
+ // Give extra weight to what looks like a loop induction variable update.
+ if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
+ weight *= 3;
+
+ totalWeight += weight;
+ }
+
+ // Get allocation hints from copies.
+ if (noHint || !mi->isCopy())
+ continue;
+ unsigned hint = copyHint(mi, li.reg, tri, mri);
+ if (!hint)
+ continue;
+ // Force hweight onto the stack so that x86 doesn't add hidden precision,
+ // making the comparison incorrectly pass (i.e., 1 > 1 == true??).
+ //
+ // FIXME: we probably shouldn't use floats at all.
+ volatile float hweight = Hint[hint] += weight;
+ if (TargetRegisterInfo::isPhysicalRegister(hint)) {
+ if (hweight > bestPhys && mri.isAllocatable(hint)) {
+ bestPhys = hweight;
+ hintPhys = hint;
+ }
+ } else {
+ if (hweight > bestVirt) {
+ bestVirt = hweight;
+ hintVirt = hint;
+ }
+ }
+ }
+
+ Hint.clear();
+
+ // Always prefer the physreg hint.
+ if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
+ mri.setRegAllocationHint(li.reg, 0, hint);
+ // Weakly boost the spill weight of hinted registers.
+ totalWeight *= 1.01F;
+ }
+
+ // If the live interval was already unspillable, leave it that way.
+ if (!Spillable)
+ return;
+
+ // Mark li as unspillable if all live ranges are tiny and the interval
+ // is not live at any reg mask. If the interval is live at a reg mask
+ // spilling may be required.
+ if (li.isZeroLength(LIS.getSlotIndexes()) &&
+ !li.isLiveAtIndexes(LIS.getRegMaskSlots())) {
+ li.markNotSpillable();
+ return;
+ }
+
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
+ totalWeight *= 0.5F;
+
+ li.weight = normalize(totalWeight, li.getSize(), numInstr);
+}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
new file mode 100644
index 000000000000..7d67bcfe5469
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -0,0 +1,287 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
+ SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
+ : CallingConv(CC), IsVarArg(isVarArg), MF(mf),
+ TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C),
+ CallOrPrologue(Unknown) {
+ // No stack is used.
+ StackOffset = 0;
+ MaxStackArgAlign = 1;
+
+ clearByValRegsInfo();
+ UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+/// Allocate space on the stack large enough to pass an argument by value.
+/// The size and alignment information of the argument is encoded in
+/// its parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ int MinSize, int MinAlign,
+ ISD::ArgFlagsTy ArgFlags) {
+ unsigned Align = ArgFlags.getByValAlign();
+ unsigned Size = ArgFlags.getByValSize();
+ if (MinSize > (int)Size)
+ Size = MinSize;
+ if (MinAlign > (int)Align)
+ Align = MinAlign;
+ ensureMaxAlignment(Align);
+ MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align);
+ Size = unsigned(alignTo(Size, MinAlign));
+ unsigned Offset = AllocateStack(Size, Align);
+ addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+}
+
+/// Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI/32] |= 1 << (*AI&31);
+}
+
+/// Analyze an array of argument values,
+/// incorporating info about the formals into this state.
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ unsigned NumArgs = Ins.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Ins[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Formal argument #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+}
+
+/// Analyze the return values of a function, returning true if the return can
+/// be performed without sret-demotion and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ return false;
+ }
+ return true;
+}
+
+/// Analyze the returned values of a return,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ MVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Return operand #" << i << " has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+}
+
+/// Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+ CCAssignFn Fn) {
+ unsigned NumOps = Outs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+}
+
+/// Same as above except it takes vectors of types and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+ CCAssignFn Fn) {
+ unsigned NumOps = ArgVTs.size();
+ for (unsigned i = 0; i != NumOps; ++i) {
+ MVT ArgVT = ArgVTs[i];
+ ISD::ArgFlagsTy ArgFlags = Flags[i];
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+}
+
+/// Analyze the return values of a call, incorporating info about the passed
+/// values into this state.
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn Fn) {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ MVT VT = Ins[i].VT;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result #" << i << " has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+}
+
+/// Same as above except it's specialized for calls that produce a single value.
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+ if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call result has unhandled type "
+ << EVT(VT).getEVTString() << '\n';
+#endif
+ llvm_unreachable(nullptr);
+ }
+}
+
+static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
+ if (VT.isVector())
+ return true; // Assume -msse-regparm might be in effect.
+ if (!VT.isInteger())
+ return false;
+ if (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall)
+ return true;
+ return false;
+}
+
+void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
+ MVT VT, CCAssignFn Fn) {
+ unsigned SavedStackOffset = StackOffset;
+ unsigned SavedMaxStackArgAlign = MaxStackArgAlign;
+ unsigned NumLocs = Locs.size();
+
+ // Set the 'inreg' flag if it is used for this calling convention.
+ ISD::ArgFlagsTy Flags;
+ if (isValueTypeInRegForCC(CallingConv, VT))
+ Flags.setInReg();
+
+ // Allocate something of this value type repeatedly until we get assigned a
+ // location in memory.
+ bool HaveRegParm = true;
+ while (HaveRegParm) {
+ if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+ dbgs() << "Call has unhandled type " << EVT(VT).getEVTString()
+ << " while computing remaining regparms\n";
+#endif
+ llvm_unreachable(nullptr);
+ }
+ HaveRegParm = Locs.back().isRegLoc();
+ }
+
+ // Copy all the registers from the value locations we added.
+ assert(NumLocs < Locs.size() && "CC assignment failed to add location");
+ for (unsigned I = NumLocs, E = Locs.size(); I != E; ++I)
+ if (Locs[I].isRegLoc())
+ Regs.push_back(MCPhysReg(Locs[I].getLocReg()));
+
+ // Clear the assigned values and stack memory. We leave the registers marked
+ // as allocated so that future queries don't return the same registers, i.e.
+ // when i64 and f64 are both passed in GPRs.
+ StackOffset = SavedStackOffset;
+ MaxStackArgAlign = SavedMaxStackArgAlign;
+ Locs.resize(NumLocs);
+}
+
+void CCState::analyzeMustTailForwardedRegisters(
+ SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes,
+ CCAssignFn Fn) {
+ // Oftentimes calling conventions will not user register parameters for
+ // variadic functions, so we need to assume we're not variadic so that we get
+ // all the registers that might be used in a non-variadic call.
+ SaveAndRestore<bool> SavedVarArg(IsVarArg, false);
+ SaveAndRestore<bool> SavedMustTail(AnalyzingMustTailForwardedRegs, true);
+
+ for (MVT RegVT : RegParmTypes) {
+ SmallVector<MCPhysReg, 8> RemainingRegs;
+ getRemainingRegParmsForType(RemainingRegs, RegVT, Fn);
+ const TargetLowering *TL = MF.getSubtarget().getTargetLowering();
+ const TargetRegisterClass *RC = TL->getRegClassFor(RegVT);
+ for (MCPhysReg PReg : RemainingRegs) {
+ unsigned VReg = MF.addLiveIn(PReg, RC);
+ Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT));
+ }
+ }
+}
+
+bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
+ CallingConv::ID CallerCC, MachineFunction &MF,
+ LLVMContext &C,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ CCAssignFn CalleeFn, CCAssignFn CallerFn) {
+ if (CalleeCC == CallerCC)
+ return true;
+ SmallVector<CCValAssign, 4> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, MF, RVLocs1, C);
+ CCInfo1.AnalyzeCallResult(Ins, CalleeFn);
+
+ SmallVector<CCValAssign, 4> RVLocs2;
+ CCState CCInfo2(CallerCC, false, MF, RVLocs2, C);
+ CCInfo2.AnalyzeCallResult(Ins, CallerFn);
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned I = 0, E = RVLocs1.size(); I != E; ++I) {
+ const CCValAssign &Loc1 = RVLocs1[I];
+ const CCValAssign &Loc2 = RVLocs2[I];
+ if (Loc1.getLocInfo() != Loc2.getLocInfo())
+ return false;
+ bool RegLoc1 = Loc1.isRegLoc();
+ if (RegLoc1 != Loc2.isRegLoc())
+ return false;
+ if (RegLoc1) {
+ if (Loc1.getLocReg() != Loc2.getLocReg())
+ return false;
+ } else {
+ if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset())
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 000000000000..6679819fdef6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,94 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeAtomicExpandPass(Registry);
+ initializeBranchFolderPassPass(Registry);
+ initializeCodeGenPreparePass(Registry);
+ initializeDeadMachineInstructionElimPass(Registry);
+ initializeDetectDeadLanesPass(Registry);
+ initializeDwarfEHPreparePass(Registry);
+ initializeEarlyIfConverterPass(Registry);
+ initializeExpandISelPseudosPass(Registry);
+ initializeExpandPostRAPass(Registry);
+ initializeFinalizeMachineBundlesPass(Registry);
+ initializeFuncletLayoutPass(Registry);
+ initializeGCMachineCodeAnalysisPass(Registry);
+ initializeGCModuleInfoPass(Registry);
+ initializeIfConverterPass(Registry);
+ initializeInterleavedAccessPass(Registry);
+ initializeLiveDebugVariablesPass(Registry);
+ initializeLiveIntervalsPass(Registry);
+ initializeLiveStacksPass(Registry);
+ initializeLiveVariablesPass(Registry);
+ initializeLocalStackSlotPassPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
+ initializeMachineBlockFrequencyInfoPass(Registry);
+ initializeMachineBlockPlacementPass(Registry);
+ initializeMachineBlockPlacementStatsPass(Registry);
+ initializeMachineCSEPass(Registry);
+ initializeImplicitNullChecksPass(Registry);
+ initializeMachineCombinerPass(Registry);
+ initializeMachineCopyPropagationPass(Registry);
+ initializeMachineDominatorTreePass(Registry);
+ initializeMachineFunctionPrinterPassPass(Registry);
+ initializeMachineLICMPass(Registry);
+ initializeMachineLoopInfoPass(Registry);
+ initializeMachineModuleInfoPass(Registry);
+ initializeMachinePostDominatorTreePass(Registry);
+ initializeMachineSchedulerPass(Registry);
+ initializeMachineSinkingPass(Registry);
+ initializeMachineVerifierPassPass(Registry);
+ initializeXRayInstrumentationPass(Registry);
+ initializePatchableFunctionPass(Registry);
+ initializeOptimizePHIsPass(Registry);
+ initializePEIPass(Registry);
+ initializePHIEliminationPass(Registry);
+ initializePeepholeOptimizerPass(Registry);
+ initializePostMachineSchedulerPass(Registry);
+ initializePostRAHazardRecognizerPass(Registry);
+ initializePostRASchedulerPass(Registry);
+ initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
+ initializeProcessImplicitDefsPass(Registry);
+ initializeRegisterCoalescerPass(Registry);
+ initializeRenameIndependentSubregsPass(Registry);
+ initializeShrinkWrapPass(Registry);
+ initializeSlotIndexesPass(Registry);
+ initializeStackColoringPass(Registry);
+ initializeStackMapLivenessPass(Registry);
+ initializeLiveDebugValuesPass(Registry);
+ initializeSafeStackPass(Registry);
+ initializeStackProtectorPass(Registry);
+ initializeStackSlotColoringPass(Registry);
+ initializeTailDuplicatePassPass(Registry);
+ initializeTargetPassConfigPass(Registry);
+ initializeTwoAddressInstructionPassPass(Registry);
+ initializeUnpackMachineBundlesPass(Registry);
+ initializeUnreachableBlockElimLegacyPassPass(Registry);
+ initializeUnreachableMachineBlockElimPass(Registry);
+ initializeVirtRegMapPass(Registry);
+ initializeVirtRegRewriterPass(Registry);
+ initializeWinEHPreparePass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+ initializeCodeGen(*unwrap(R));
+}
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
new file mode 100644
index 000000000000..ede404149a1c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -0,0 +1,5656 @@
+//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "codegenprepare"
+
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+ "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+ "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+ "computations were sunk");
+STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumAndsAdded,
+ "Number of and mask instructions added to form ext loads");
+STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
+STATISTIC(NumRetsDup, "Number of return instructions duplicated");
+STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
+STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
+STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
+STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
+
+static cl::opt<bool> DisableBranchOpts(
+ "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable branch optimizations in CodeGenPrepare"));
+
+static cl::opt<bool>
+ DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
+ cl::desc("Disable GC optimizations in CodeGenPrepare"));
+
+static cl::opt<bool> DisableSelectToBranch(
+ "disable-cgp-select2branch", cl::Hidden, cl::init(false),
+ cl::desc("Disable select to branch conversion."));
+
+static cl::opt<bool> AddrSinkUsingGEPs(
+ "addr-sink-using-gep", cl::Hidden, cl::init(false),
+ cl::desc("Address sinking in CGP using GEPs."));
+
+static cl::opt<bool> EnableAndCmpSinking(
+ "enable-andcmp-sinking", cl::Hidden, cl::init(true),
+ cl::desc("Enable sinkinig and/cmp into branches."));
+
+static cl::opt<bool> DisableStoreExtract(
+ "disable-cgp-store-extract", cl::Hidden, cl::init(false),
+ cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
+
+static cl::opt<bool> StressStoreExtract(
+ "stress-cgp-store-extract", cl::Hidden, cl::init(false),
+ cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
+
+static cl::opt<bool> DisableExtLdPromotion(
+ "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
+ "CodeGenPrepare"));
+
+static cl::opt<bool> StressExtLdPromotion(
+ "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
+ "optimization in CodeGenPrepare"));
+
+static cl::opt<bool> DisablePreheaderProtect(
+ "disable-preheader-prot", cl::Hidden, cl::init(false),
+ cl::desc("Disable protection against removing loop preheaders"));
+
+namespace {
+typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
+typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
+typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+class TypePromotionTransaction;
+
+ class CodeGenPrepare : public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLowering *TLI;
+ const TargetTransformInfo *TTI;
+ const TargetLibraryInfo *TLInfo;
+ const LoopInfo *LI;
+
+ /// As we scan instructions optimizing them, this is the next instruction
+ /// to optimize. Transforms that can invalidate this should update it.
+ BasicBlock::iterator CurInstIterator;
+
+ /// Keeps track of non-local addresses that have been sunk into a block.
+ /// This allows us to avoid inserting duplicate code for blocks with
+ /// multiple load/stores of the same address.
+ ValueMap<Value*, Value*> SunkAddrs;
+
+ /// Keeps track of all instructions inserted for the current function.
+ SetOfInstrs InsertedInsts;
+ /// Keeps track of the type of the related instruction before their
+ /// promotion for the current function.
+ InstrToOrigTy PromotedInsts;
+
+ /// True if CFG is modified in any way.
+ bool ModifiedDT;
+
+ /// True if optimizing for size.
+ bool OptSize;
+
+ /// DataLayout for the Function being processed.
+ const DataLayout *DL;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
+ initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+ const char *getPassName() const override { return "CodeGen Prepare"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ // FIXME: When we can selectively preserve passes, preserve the domtree.
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+
+ private:
+ bool eliminateFallThrough(Function &F);
+ bool eliminateMostlyEmptyBlocks(Function &F);
+ bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+ void eliminateMostlyEmptyBlock(BasicBlock *BB);
+ bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
+ bool optimizeInst(Instruction *I, bool& ModifiedDT);
+ bool optimizeMemoryInst(Instruction *I, Value *Addr,
+ Type *AccessTy, unsigned AS);
+ bool optimizeInlineAsmInst(CallInst *CS);
+ bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
+ bool moveExtToFormExtLoad(Instruction *&I);
+ bool optimizeExtUses(Instruction *I);
+ bool optimizeLoadExt(LoadInst *I);
+ bool optimizeSelectInst(SelectInst *SI);
+ bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
+ bool optimizeSwitchInst(SwitchInst *CI);
+ bool optimizeExtractElementInst(Instruction *Inst);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB);
+ bool placeDbgValues(Function &F);
+ bool sinkAndCmp(Function &F);
+ bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
+ Instruction *&Inst,
+ const SmallVectorImpl<Instruction *> &Exts,
+ unsigned CreatedInstCost);
+ bool splitBranchCondition(Function &F);
+ bool simplifyOffsetableRelocate(Instruction &I);
+ void stripInvariantGroupMetadata(Instruction &I);
+ };
+}
+
+char CodeGenPrepare::ID = 0;
+INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
+ return new CodeGenPrepare(TM);
+}
+
+bool CodeGenPrepare::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ DL = &F.getParent()->getDataLayout();
+
+ bool EverMadeChange = false;
+ // Clear per function information.
+ InsertedInsts.clear();
+ PromotedInsts.clear();
+
+ ModifiedDT = false;
+ if (TM)
+ TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ OptSize = F.optForSize();
+
+ /// This optimization identifies DIV instructions that can be
+ /// profitably bypassed and carried out with a shorter, faster divide.
+ if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
+ const DenseMap<unsigned int, unsigned int> &BypassWidths =
+ TLI->getBypassSlowDivWidths();
+ BasicBlock* BB = &*F.begin();
+ while (BB != nullptr) {
+ // bypassSlowDivision may create new BBs, but we don't want to reapply the
+ // optimization to those blocks.
+ BasicBlock* Next = BB->getNextNode();
+ EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
+ BB = Next;
+ }
+ }
+
+ // Eliminate blocks that contain only PHI nodes and an
+ // unconditional branch.
+ EverMadeChange |= eliminateMostlyEmptyBlocks(F);
+
+ // llvm.dbg.value is far away from the value then iSel may not be able
+ // handle it properly. iSel will drop llvm.dbg.value if it can not
+ // find a node corresponding to the value.
+ EverMadeChange |= placeDbgValues(F);
+
+ // If there is a mask, compare against zero, and branch that can be combined
+ // into a single target instruction, push the mask and compare into branch
+ // users. Do this before OptimizeBlock -> OptimizeInst ->
+ // OptimizeCmpExpression, which perturbs the pattern being searched for.
+ if (!DisableBranchOpts) {
+ EverMadeChange |= sinkAndCmp(F);
+ EverMadeChange |= splitBranchCondition(F);
+ }
+
+ bool MadeChange = true;
+ while (MadeChange) {
+ MadeChange = false;
+ for (Function::iterator I = F.begin(); I != F.end(); ) {
+ BasicBlock *BB = &*I++;
+ bool ModifiedDTOnIteration = false;
+ MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
+
+ // Restart BB iteration if the dominator tree of the Function was changed
+ if (ModifiedDTOnIteration)
+ break;
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ SunkAddrs.clear();
+
+ if (!DisableBranchOpts) {
+ MadeChange = false;
+ SmallPtrSet<BasicBlock*, 8> WorkList;
+ for (BasicBlock &BB : F) {
+ SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
+ MadeChange |= ConstantFoldTerminator(&BB, true);
+ if (!MadeChange) continue;
+
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+ if (pred_begin(*II) == pred_end(*II))
+ WorkList.insert(*II);
+ }
+
+ // Delete the dead blocks and any of their dead successors.
+ MadeChange |= !WorkList.empty();
+ while (!WorkList.empty()) {
+ BasicBlock *BB = *WorkList.begin();
+ WorkList.erase(BB);
+ SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+
+ DeleteDeadBlock(BB);
+
+ for (SmallVectorImpl<BasicBlock*>::iterator
+ II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+ if (pred_begin(*II) == pred_end(*II))
+ WorkList.insert(*II);
+ }
+
+ // Merge pairs of basic blocks with unconditional branches, connected by
+ // a single edge.
+ if (EverMadeChange || MadeChange)
+ MadeChange |= eliminateFallThrough(F);
+
+ EverMadeChange |= MadeChange;
+ }
+
+ if (!DisableGCOpts) {
+ SmallVector<Instruction *, 2> Statepoints;
+ for (BasicBlock &BB : F)
+ for (Instruction &I : BB)
+ if (isStatepoint(I))
+ Statepoints.push_back(&I);
+ for (auto &I : Statepoints)
+ EverMadeChange |= simplifyOffsetableRelocate(*I);
+ }
+
+ return EverMadeChange;
+}
+
+/// Merge basic blocks which are connected by a single edge, where one of the
+/// basic blocks has a single successor pointing to the other basic block,
+/// which has a single predecessor.
+bool CodeGenPrepare::eliminateFallThrough(Function &F) {
+ bool Changed = false;
+ // Scan all of the blocks in the function, except for the entry block.
+ for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
+ BasicBlock *BB = &*I++;
+ // If the destination block has a single pred, then this is a trivial
+ // edge, just collapse it.
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+ // Don't merge if BB's address is taken.
+ if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
+
+ BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+ if (Term && !Term->isConditional()) {
+ Changed = true;
+ DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
+ // Remember if SinglePred was the entry block of the function.
+ // If so, we will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(BB, nullptr);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ // We have erased a block. Update the iterator.
+ I = BB->getIterator();
+ }
+ }
+ return Changed;
+}
+
+/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
+/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
+/// edges in ways that are non-optimal for isel. Start by eliminating these
+/// blocks so we can split them the way we want them.
+bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
+ SmallPtrSet<BasicBlock *, 16> Preheaders;
+ SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
+ while (!LoopList.empty()) {
+ Loop *L = LoopList.pop_back_val();
+ LoopList.insert(LoopList.end(), L->begin(), L->end());
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ Preheaders.insert(Preheader);
+ }
+
+ bool MadeChange = false;
+ // Note that this intentionally skips the entry block.
+ for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
+ BasicBlock *BB = &*I++;
+
+ // If this block doesn't end with an uncond branch, ignore it.
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || !BI->isUnconditional())
+ continue;
+
+ // If the instruction before the branch (skipping debug info) isn't a phi
+ // node, then other stuff is happening here.
+ BasicBlock::iterator BBI = BI->getIterator();
+ if (BBI != BB->begin()) {
+ --BBI;
+ while (isa<DbgInfoIntrinsic>(BBI)) {
+ if (BBI == BB->begin())
+ break;
+ --BBI;
+ }
+ if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+ continue;
+ }
+
+ // Do not break infinite loops.
+ BasicBlock *DestBB = BI->getSuccessor(0);
+ if (DestBB == BB)
+ continue;
+
+ if (!canMergeBlocks(BB, DestBB))
+ continue;
+
+ // Do not delete loop preheaders if doing so would create a critical edge.
+ // Loop preheaders can be good locations to spill registers. If the
+ // preheader is deleted and we create a critical edge, registers may be
+ // spilled in the loop body instead.
+ if (!DisablePreheaderProtect && Preheaders.count(BB) &&
+ !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
+ continue;
+
+ eliminateMostlyEmptyBlock(BB);
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+/// Return true if we can merge BB into DestBB if there is a single
+/// unconditional branch between them, and BB contains no other non-phi
+/// instructions.
+bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
+ const BasicBlock *DestBB) const {
+ // We only want to eliminate blocks whose phi nodes are used by phi nodes in
+ // the successor. If there are more complex condition (e.g. preheaders),
+ // don't mess around with them.
+ BasicBlock::const_iterator BBI = BB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ for (const User *U : PN->users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != DestBB || !isa<PHINode>(UI))
+ return false;
+ // If User is inside DestBB block and it is a PHINode then check
+ // incoming value. If incoming value is not from BB then this is
+ // a complex condition (e.g. preheaders) we want to avoid here.
+ if (UI->getParent() == DestBB) {
+ if (const PHINode *UPN = dyn_cast<PHINode>(UI))
+ for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
+ Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
+ if (Insn && Insn->getParent() == BB &&
+ Insn->getParent() != UPN->getIncomingBlock(I))
+ return false;
+ }
+ }
+ }
+ }
+
+ // If BB and DestBB contain any common predecessors, then the phi nodes in BB
+ // and DestBB may have conflicting incoming values for the block. If so, we
+ // can't merge the block.
+ const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
+ if (!DestBBPN) return true; // no conflict.
+
+ // Collect the preds of BB.
+ SmallPtrSet<const BasicBlock*, 16> BBPreds;
+ if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ // It is faster to get preds from a PHI than with pred_iterator.
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ BBPreds.insert(BBPN->getIncomingBlock(i));
+ } else {
+ BBPreds.insert(pred_begin(BB), pred_end(BB));
+ }
+
+ // Walk the preds of DestBB.
+ for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
+ if (BBPreds.count(Pred)) { // Common predecessor?
+ BBI = DestBB->begin();
+ while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+ const Value *V1 = PN->getIncomingValueForBlock(Pred);
+ const Value *V2 = PN->getIncomingValueForBlock(BB);
+
+ // If V2 is a phi node in BB, look up what the mapped value will be.
+ if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
+ if (V2PN->getParent() == BB)
+ V2 = V2PN->getIncomingValueForBlock(Pred);
+
+ // If there is a conflict, bail out.
+ if (V1 != V2) return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+
+/// Eliminate a basic block that has only phi's and an unconditional branch in
+/// it.
+void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
+ BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+ BasicBlock *DestBB = BI->getSuccessor(0);
+
+ DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
+
+ // If the destination block has a single pred, then this is a trivial edge,
+ // just collapse it.
+ if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+ if (SinglePred != DestBB) {
+ // Remember if SinglePred was the entry block of the function. If so, we
+ // will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+ return;
+ }
+ }
+
+ // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
+ // to handle the new incoming edges it is about to have.
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = DestBB->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ // Remove the incoming value for BB, and remember it.
+ Value *InVal = PN->removeIncomingValue(BB, false);
+
+ // Two options: either the InVal is a phi node defined in BB or it is some
+ // value that dominates BB.
+ PHINode *InValPhi = dyn_cast<PHINode>(InVal);
+ if (InValPhi && InValPhi->getParent() == BB) {
+ // Add all of the input values of the input PHI as inputs of this phi.
+ for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InValPhi->getIncomingValue(i),
+ InValPhi->getIncomingBlock(i));
+ } else {
+ // Otherwise, add one instance of the dominating value for each edge that
+ // we will be adding.
+ if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+ PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ PN->addIncoming(InVal, *PI);
+ }
+ }
+ }
+
+ // The PHIs are now updated, change everything that refers to BB to use
+ // DestBB and remove BB.
+ BB->replaceAllUsesWith(DestBB);
+ BB->eraseFromParent();
+ ++NumBlocksElim;
+
+ DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+}
+
+// Computes a map of base pointer relocation instructions to corresponding
+// derived pointer relocation instructions given a vector of all relocate calls
+static void computeBaseDerivedRelocateMap(
+ const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
+ DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
+ &RelocateInstMap) {
+ // Collect information in two maps: one primarily for locating the base object
+ // while filling the second map; the second map is the final structure holding
+ // a mapping between Base and corresponding Derived relocate calls
+ DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
+ for (auto *ThisRelocate : AllRelocateCalls) {
+ auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
+ ThisRelocate->getDerivedPtrIndex());
+ RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
+ }
+ for (auto &Item : RelocateIdxMap) {
+ std::pair<unsigned, unsigned> Key = Item.first;
+ if (Key.first == Key.second)
+ // Base relocation: nothing to insert
+ continue;
+
+ GCRelocateInst *I = Item.second;
+ auto BaseKey = std::make_pair(Key.first, Key.first);
+
+ // We're iterating over RelocateIdxMap so we cannot modify it.
+ auto MaybeBase = RelocateIdxMap.find(BaseKey);
+ if (MaybeBase == RelocateIdxMap.end())
+ // TODO: We might want to insert a new base object relocate and gep off
+ // that, if there are enough derived object relocates.
+ continue;
+
+ RelocateInstMap[MaybeBase->second].push_back(I);
+ }
+}
+
+// Accepts a GEP and extracts the operands into a vector provided they're all
+// small integer constants
+static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
+ SmallVectorImpl<Value *> &OffsetV) {
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
+ // Only accept small constant integer operands
+ auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!Op || Op->getZExtValue() > 20)
+ return false;
+ }
+
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++)
+ OffsetV.push_back(GEP->getOperand(i));
+ return true;
+}
+
+// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
+// replace, computes a replacement, and affects it.
+static bool
+simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
+ const SmallVectorImpl<GCRelocateInst *> &Targets) {
+ bool MadeChange = false;
+ for (GCRelocateInst *ToReplace : Targets) {
+ assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
+ "Not relocating a derived object of the original base object");
+ if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
+ // A duplicate relocate call. TODO: coalesce duplicates.
+ continue;
+ }
+
+ if (RelocatedBase->getParent() != ToReplace->getParent()) {
+ // Base and derived relocates are in different basic blocks.
+ // In this case transform is only valid when base dominates derived
+ // relocate. However it would be too expensive to check dominance
+ // for each such relocate, so we skip the whole transformation.
+ continue;
+ }
+
+ Value *Base = ToReplace->getBasePtr();
+ auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
+ if (!Derived || Derived->getPointerOperand() != Base)
+ continue;
+
+ SmallVector<Value *, 2> OffsetV;
+ if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
+ continue;
+
+ // Create a Builder and replace the target callsite with a gep
+ assert(RelocatedBase->getNextNode() &&
+ "Should always have one since it's not a terminator");
+
+ // Insert after RelocatedBase
+ IRBuilder<> Builder(RelocatedBase->getNextNode());
+ Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
+
+ // If gc_relocate does not match the actual type, cast it to the right type.
+ // In theory, there must be a bitcast after gc_relocate if the type does not
+ // match, and we should reuse it to get the derived pointer. But it could be
+ // cases like this:
+ // bb1:
+ // ...
+ // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+ // br label %merge
+ //
+ // bb2:
+ // ...
+ // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
+ // br label %merge
+ //
+ // merge:
+ // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
+ // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
+ //
+ // In this case, we can not find the bitcast any more. So we insert a new bitcast
+ // no matter there is already one or not. In this way, we can handle all cases, and
+ // the extra bitcast should be optimized away in later passes.
+ Value *ActualRelocatedBase = RelocatedBase;
+ if (RelocatedBase->getType() != Base->getType()) {
+ ActualRelocatedBase =
+ Builder.CreateBitCast(RelocatedBase, Base->getType());
+ }
+ Value *Replacement = Builder.CreateGEP(
+ Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
+ Replacement->takeName(ToReplace);
+ // If the newly generated derived pointer's type does not match the original derived
+ // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
+ Value *ActualReplacement = Replacement;
+ if (Replacement->getType() != ToReplace->getType()) {
+ ActualReplacement =
+ Builder.CreateBitCast(Replacement, ToReplace->getType());
+ }
+ ToReplace->replaceAllUsesWith(ActualReplacement);
+ ToReplace->eraseFromParent();
+
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+// Turns this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = relocate(%tok, i32 4, i32 4)
+// %ptr' = relocate(%tok, i32 4, i32 5)
+// %val = load %ptr'
+//
+// into this:
+//
+// %base = ...
+// %ptr = gep %base + 15
+// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
+// %base' = gc.relocate(%tok, i32 4, i32 4)
+// %ptr' = gep %base' + 15
+// %val = load %ptr'
+bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
+ bool MadeChange = false;
+ SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
+
+ for (auto *U : I.users())
+ if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
+ // Collect all the relocate calls associated with a statepoint
+ AllRelocateCalls.push_back(Relocate);
+
+ // We need atleast one base pointer relocation + one derived pointer
+ // relocation to mangle
+ if (AllRelocateCalls.size() < 2)
+ return false;
+
+ // RelocateInstMap is a mapping from the base relocate instruction to the
+ // corresponding derived relocate instructions
+ DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
+ computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
+ if (RelocateInstMap.empty())
+ return false;
+
+ for (auto &Item : RelocateInstMap)
+ // Item.first is the RelocatedBase to offset against
+ // Item.second is the vector of Targets to replace
+ MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
+ return MadeChange;
+}
+
+/// SinkCast - Sink the specified cast instruction into its user blocks
+static bool SinkCast(CastInst *CI) {
+ BasicBlock *DefBB = CI->getParent();
+
+ /// InsertedCasts - Only insert a cast in each block once.
+ DenseMap<BasicBlock*, CastInst*> InsertedCasts;
+
+ bool MadeChange = false;
+ for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Figure out which BB this cast is used in. For PHI's this is the
+ // appropriate predecessor block.
+ BasicBlock *UserBB = User->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ UserBB = PN->getIncomingBlock(TheUse);
+ }
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // The first insertion point of a block containing an EH pad is after the
+ // pad. If the pad is the user, we cannot sink the cast past the pad.
+ if (User->isEHPad())
+ continue;
+
+ // If the block selected to receive the cast is an EH pad that does not
+ // allow non-PHI instructions before the terminator, we can't sink the
+ // cast.
+ if (UserBB->getTerminator()->isEHPad())
+ continue;
+
+ // If this user is in the same block as the cast, don't change the cast.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cast into this block, use it.
+ CastInst *&InsertedCast = InsertedCasts[UserBB];
+
+ if (!InsertedCast) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
+ InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
+ CI->getType(), "", &*InsertPt);
+ }
+
+ // Replace a use of the cast with a use of the new cast.
+ TheUse = InsertedCast;
+ MadeChange = true;
+ ++NumCastUses;
+ }
+
+ // If we removed all uses, nuke the cast.
+ if (CI->use_empty()) {
+ CI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// If the specified cast instruction is a noop copy (e.g. it's casting from
+/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
+/// reduce the number of virtual registers that must be created and coalesced.
+///
+/// Return true if any changes are made.
+///
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
+ const DataLayout &DL) {
+ // If this is a noop copy,
+ EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, CI->getType());
+
+ // This is an fp<->int conversion?
+ if (SrcVT.isInteger() != DstVT.isInteger())
+ return false;
+
+ // If this is an extension, it will be a zero or sign extension, which
+ // isn't a noop.
+ if (SrcVT.bitsLT(DstVT)) return false;
+
+ // If these values will be promoted, find out what they will be promoted
+ // to. This helps us consider truncates on PPC as noop copies when they
+ // are.
+ if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
+ TargetLowering::TypePromoteInteger)
+ SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+ if (TLI.getTypeAction(CI->getContext(), DstVT) ==
+ TargetLowering::TypePromoteInteger)
+ DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
+
+ // If, after promotion, these are the same types, this is a noop copy.
+ if (SrcVT != DstVT)
+ return false;
+
+ return SinkCast(CI);
+}
+
+/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
+/// possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+ Value *A, *B;
+ Instruction *AddI;
+ if (!match(CI,
+ m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+ return false;
+
+ Type *Ty = AddI->getType();
+ if (!isa<IntegerType>(Ty))
+ return false;
+
+ // We don't want to move around uses of condition values this late, so we we
+ // check if it is legal to create the call to the intrinsic in the basic
+ // block containing the icmp:
+
+ if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+ return false;
+
+#ifndef NDEBUG
+ // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+ // for now:
+ if (AddI->hasOneUse())
+ assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+ Module *M = CI->getModule();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+ auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+ auto *UAddWithOverflow =
+ CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+ auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+ auto *Overflow =
+ ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+ CI->replaceAllUsesWith(Overflow);
+ AddI->replaceAllUsesWith(UAdd);
+ CI->eraseFromParent();
+ AddI->eraseFromParent();
+ return true;
+}
+
+/// Sink the given CmpInst into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced. This is a clear win except on
+/// targets with multiple condition code registers (PowerPC), where it might
+/// lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
+ BasicBlock *DefBB = CI->getParent();
+
+ // Avoid sinking soft-FP comparisons, since this can move them into a loop.
+ if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
+ return false;
+
+ // Only insert a cmp in each block once.
+ DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+
+ bool MadeChange = false;
+ for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ // Figure out which BB this cmp is used in.
+ BasicBlock *UserBB = User->getParent();
+
+ // If this user is in the same block as the cmp, don't change the cmp.
+ if (UserBB == DefBB) continue;
+
+ // If we have already inserted a cmp into this block, use it.
+ CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+ if (!InsertedCmp) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
+ InsertedCmp =
+ CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
+ CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
+ }
+
+ // Replace a use of the cmp with a use of the new cmp.
+ TheUse = InsertedCmp;
+ MadeChange = true;
+ ++NumCmpUses;
+ }
+
+ // If we removed all uses, nuke the cmp.
+ if (CI->use_empty()) {
+ CI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
+ if (SinkCmpExpression(CI, TLI))
+ return true;
+
+ if (CombineUAddWithOverflow(CI))
+ return true;
+
+ return false;
+}
+
+/// Check if the candidates could be combined with a shift instruction, which
+/// includes:
+/// 1. Truncate instruction
+/// 2. And instruction and the imm is a mask of the low bits:
+/// imm & (imm+1) == 0
+static bool isExtractBitsCandidateUse(Instruction *User) {
+ if (!isa<TruncInst>(User)) {
+ if (User->getOpcode() != Instruction::And ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return false;
+
+ const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
+
+ if ((Cimm & (Cimm + 1)).getBoolValue())
+ return false;
+ }
+ return true;
+}
+
+/// Sink both shift and truncate instruction to the use of truncate's BB.
+static bool
+SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
+ DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
+ const TargetLowering &TLI, const DataLayout &DL) {
+ BasicBlock *UserBB = User->getParent();
+ DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
+ TruncInst *TruncI = dyn_cast<TruncInst>(User);
+ bool MadeChange = false;
+
+ for (Value::user_iterator TruncUI = TruncI->user_begin(),
+ TruncE = TruncI->user_end();
+ TruncUI != TruncE;) {
+
+ Use &TruncTheUse = TruncUI.getUse();
+ Instruction *TruncUser = cast<Instruction>(*TruncUI);
+ // Preincrement use iterator so we don't invalidate it.
+
+ ++TruncUI;
+
+ int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
+ if (!ISDOpcode)
+ continue;
+
+ // If the use is actually a legal node, there will not be an
+ // implicit truncate.
+ // FIXME: always querying the result type is just an
+ // approximation; some nodes' legality is determined by the
+ // operand or other means. There's no good way to find out though.
+ if (TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
+ continue;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(TruncUser))
+ continue;
+
+ BasicBlock *TruncUserBB = TruncUser->getParent();
+
+ if (UserBB == TruncUserBB)
+ continue;
+
+ BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
+ CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
+
+ if (!InsertedShift && !InsertedTrunc) {
+ BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
+ assert(InsertPt != TruncUserBB->end());
+ // Sink the shift
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
+ else
+ InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
+
+ // Sink the trunc
+ BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
+ TruncInsertPt++;
+ assert(TruncInsertPt != TruncUserBB->end());
+
+ InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
+ TruncI->getType(), "", &*TruncInsertPt);
+
+ MadeChange = true;
+
+ TruncTheUse = InsertedTrunc;
+ }
+ }
+ return MadeChange;
+}
+
+/// Sink the shift *right* instruction into user blocks if the uses could
+/// potentially be combined with this shift instruction and generate BitExtract
+/// instruction. It will only be applied if the architecture supports BitExtract
+/// instruction. Here is an example:
+/// BB1:
+/// %x.extract.shift = lshr i64 %arg1, 32
+/// BB2:
+/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
+/// ==>
+///
+/// BB2:
+/// %x.extract.shift.1 = lshr i64 %arg1, 32
+/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
+///
+/// CodeGen will recoginze the pattern in BB2 and generate BitExtract
+/// instruction.
+/// Return true if any changes are made.
+static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
+ const TargetLowering &TLI,
+ const DataLayout &DL) {
+ BasicBlock *DefBB = ShiftI->getParent();
+
+ /// Only insert instructions in each block once.
+ DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
+
+ bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
+
+ bool MadeChange = false;
+ for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
+ UI != E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ if (!isExtractBitsCandidateUse(User))
+ continue;
+
+ BasicBlock *UserBB = User->getParent();
+
+ if (UserBB == DefBB) {
+ // If the shift and truncate instruction are in the same BB. The use of
+ // the truncate(TruncUse) may still introduce another truncate if not
+ // legal. In this case, we would like to sink both shift and truncate
+ // instruction to the BB of TruncUse.
+ // for example:
+ // BB1:
+ // i64 shift.result = lshr i64 opnd, imm
+ // trunc.result = trunc shift.result to i16
+ //
+ // BB2:
+ // ----> We will have an implicit truncate here if the architecture does
+ // not have i16 compare.
+ // cmp i16 trunc.result, opnd2
+ //
+ if (isa<TruncInst>(User) && shiftIsLegal
+ // If the type of the truncate is legal, no trucate will be
+ // introduced in other basic blocks.
+ &&
+ (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
+ MadeChange =
+ SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
+
+ continue;
+ }
+ // If we have already inserted a shift into this block, use it.
+ BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
+
+ if (!InsertedShift) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
+
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
+ else
+ InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+ "", &*InsertPt);
+
+ MadeChange = true;
+ }
+
+ // Replace a use of the shift with a use of the new shift.
+ TheUse = InsertedShift;
+ }
+
+ // If we removed all uses, nuke the shift.
+ if (ShiftI->use_empty())
+ ShiftI->eraseFromParent();
+
+ return MadeChange;
+}
+
+// Translate a masked load intrinsic like
+// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask, <16 x i32> %passthru)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.load, label %else
+//
+//cond.load: ; preds = %0
+// %4 = getelementptr i32* %1, i32 0
+// %5 = load i32* %4
+// %6 = insertelement <16 x i32> undef, i32 %5, i32 0
+// br label %else
+//
+//else: ; preds = %0, %cond.load
+// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
+// %7 = extractelement <16 x i1> %mask, i32 1
+// %8 = icmp eq i1 %7, true
+// br i1 %8, label %cond.load1, label %else2
+//
+//cond.load1: ; preds = %else
+// %9 = getelementptr i32* %1, i32 1
+// %10 = load i32* %9
+// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
+// br label %else2
+//
+//else2: ; preds = %else, %cond.load1
+// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+// %12 = extractelement <16 x i1> %mask, i32 2
+// %13 = icmp eq i1 %12, true
+// br i1 %13, label %cond.load4, label %else5
+//
+static void scalarizeMaskedLoad(CallInst *CI) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ Type *EltTy = CI->getType()->getVectorElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask = isa<Constant>(Mask) &&
+ cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8);
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = VecType->getNumElements();
+
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal);
+ VResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_load = icmp eq i1 %mask_1, true
+ // br i1 %to_load, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+ }
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+}
+
+// Translate a masked store intrinsic, like
+// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.store, label %else
+//
+// cond.store: ; preds = %0
+// %4 = extractelement <16 x i32> %val, i32 0
+// %5 = getelementptr i32* %1, i32 0
+// store i32 %4, i32* %5
+// br label %else
+//
+// else: ; preds = %0, %cond.store
+// %6 = extractelement <16 x i1> %mask, i32 1
+// %7 = icmp eq i1 %6, true
+// br i1 %7, label %cond.store1, label %else2
+//
+// cond.store1: ; preds = %else
+// %8 = extractelement <16 x i32> %val, i32 1
+// %9 = getelementptr i32* %1, i32 1
+// store i32 %8, i32* %9
+// br label %else2
+// . . .
+static void scalarizeMaskedStore(CallInst *CI) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ VectorType *VecType = dyn_cast<VectorType>(Src->getType());
+ assert(VecType && "Unexpected data type in masked store intrinsic");
+
+ Type *EltTy = VecType->getElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask = isa<Constant>(Mask) &&
+ cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Builder.CreateAlignedStore(Src, Ptr, AlignVal);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8);
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = VecType->getNumElements();
+
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_store = icmp eq i1 %mask_1, true
+ // br i1 %to_store, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %OneElt = extractelement <16 x i32> %Src, i32 Idx
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %store i32 %OneElt, i32* %EltAddr
+ //
+ BasicBlock *CondBlock =
+ IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+// Translate a masked gather intrinsic like
+// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
+// <16 x i1> %Mask, <16 x i32> %Src)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> %Mask, i32 0
+// % ToLoad0 = icmp eq i1 % Mask0, true
+// br i1 % ToLoad0, label %cond.load, label %else
+//
+// cond.load:
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// % Load0 = load i32, i32* % Ptr0, align 4
+// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
+// br label %else
+//
+// else:
+// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
+// % Mask1 = extractelement <16 x i1> %Mask, i32 1
+// % ToLoad1 = icmp eq i1 % Mask1, true
+// br i1 % ToLoad1, label %cond.load1, label %else2
+//
+// cond.load1:
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// % Load1 = load i32, i32* % Ptr1, align 4
+// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
+// br label %else2
+// . . .
+// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// ret <16 x i32> %Result
+static void scalarizeMaskedGather(CallInst *CI) {
+ Value *Ptrs = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
+ "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(VResult, Load,
+ Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %Mask1 = extractelement <16 x i1> %Mask, i32 1
+ // %ToLoad1 = icmp eq i1 %Mask1, true
+ // br i1 %ToLoad1, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate = Builder.CreateExtractElement(Mask,
+ Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToLoad" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
+ "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+ }
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+}
+
+// Translate a masked scatter intrinsic, like
+// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
+// <16 x i1> %Mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set.
+//
+// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> % Mask, i32 0
+// % ToStore0 = icmp eq i1 % Mask0, true
+// br i1 %ToStore0, label %cond.store, label %else
+//
+// cond.store:
+// % Elt0 = extractelement <16 x i32> %Src, i32 0
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* % Ptr0, align 4
+// br label %else
+//
+// else:
+// % Mask1 = extractelement <16 x i1> % Mask, i32 1
+// % ToStore1 = icmp eq i1 % Mask1, true
+// br i1 % ToStore1, label %cond.store1, label %else2
+//
+// cond.store1:
+// % Elt1 = extractelement <16 x i32> %Src, i32 1
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 % Elt1, i32* % Ptr1, align 4
+// br label %else2
+// . . .
+static void scalarizeMaskedScatter(CallInst *CI) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptrs = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ assert(isa<VectorType>(Src->getType()) &&
+ "Unexpected data type in masked scatter intrinsic");
+ assert(isa<VectorType>(Ptrs->getType()) &&
+ isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
+ "Vector of pointers is expected in masked scatter intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ unsigned VectorWidth = Src->getType()->getVectorNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
+ // % ToStore = icmp eq i1 % Mask1, true
+ // br i1 % ToStore, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask,
+ Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp =
+ Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToStore" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // % Elt1 = extractelement <16 x i32> %Src, i32 1
+ // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+ // %store i32 % Elt1, i32* % Ptr1
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+/// If counting leading or trailing zeros is an expensive operation and a zero
+/// input is defined, add a check for zero to avoid calling the intrinsic.
+///
+/// We want to transform:
+/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+///
+/// into:
+/// entry:
+/// %cmpz = icmp eq i64 %A, 0
+/// br i1 %cmpz, label %cond.end, label %cond.false
+/// cond.false:
+/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+/// br label %cond.end
+/// cond.end:
+/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+///
+/// If the transform is performed, return true and set ModifiedDT to true.
+static bool despeculateCountZeros(IntrinsicInst *CountZeros,
+ const TargetLowering *TLI,
+ const DataLayout *DL,
+ bool &ModifiedDT) {
+ if (!TLI || !DL)
+ return false;
+
+ // If a zero input is undefined, it doesn't make sense to despeculate that.
+ if (match(CountZeros->getOperand(1), m_One()))
+ return false;
+
+ // If it's cheap to speculate, there's nothing to do.
+ auto IntrinsicID = CountZeros->getIntrinsicID();
+ if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
+ (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
+ return false;
+
+ // Only handle legal scalar cases. Anything else requires too much work.
+ Type *Ty = CountZeros->getType();
+ unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
+ if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
+ return false;
+
+ // The intrinsic will be sunk behind a compare against zero and branch.
+ BasicBlock *StartBlock = CountZeros->getParent();
+ BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
+
+ // Create another block after the count zero intrinsic. A PHI will be added
+ // in this block to select the result of the intrinsic or the bit-width
+ // constant if the input to the intrinsic is zero.
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
+ BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
+
+ // Set up a builder to create a compare, conditional branch, and PHI.
+ IRBuilder<> Builder(CountZeros->getContext());
+ Builder.SetInsertPoint(StartBlock->getTerminator());
+ Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
+
+ // Replace the unconditional branch that was created by the first split with
+ // a compare against zero and a conditional branch.
+ Value *Zero = Constant::getNullValue(Ty);
+ Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
+ Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
+ StartBlock->getTerminator()->eraseFromParent();
+
+ // Create a PHI in the end block to select either the output of the intrinsic
+ // or the bit width of the operand.
+ Builder.SetInsertPoint(&EndBlock->front());
+ PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
+ CountZeros->replaceAllUsesWith(PN);
+ Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
+ PN->addIncoming(BitWidth, StartBlock);
+ PN->addIncoming(CountZeros, CallBlock);
+
+ // We are explicitly handling the zero case, so we can set the intrinsic's
+ // undefined zero argument to 'true'. This will also prevent reprocessing the
+ // intrinsic; we only despeculate when a zero input is defined.
+ CountZeros->setArgOperand(1, Builder.getTrue());
+ ModifiedDT = true;
+ return true;
+}
+
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
+ BasicBlock *BB = CI->getParent();
+
+ // Lower inline assembly if we can.
+ // If we found an inline asm expession, and if the target knows how to
+ // lower it to normal LLVM code, do so now.
+ if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+ if (TLI->ExpandInlineAsm(CI)) {
+ // Avoid invalidating the iterator.
+ CurInstIterator = BB->begin();
+ // Avoid processing instructions out of order, which could cause
+ // reuse before a value is defined.
+ SunkAddrs.clear();
+ return true;
+ }
+ // Sink address computing for memory operands into the block.
+ if (optimizeInlineAsmInst(CI))
+ return true;
+ }
+
+ // Align the pointer arguments to this call if the target thinks it's a good
+ // idea
+ unsigned MinSize, PrefAlign;
+ if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ for (auto &Arg : CI->arg_operands()) {
+ // We want to align both objects whose address is used directly and
+ // objects whose address is used in casts and GEPs, though it only makes
+ // sense for GEPs if the offset is a multiple of the desired alignment and
+ // if size - offset meets the size threshold.
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ APInt Offset(DL->getPointerSizeInBits(
+ cast<PointerType>(Arg->getType())->getAddressSpace()),
+ 0);
+ Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
+ uint64_t Offset2 = Offset.getLimitedValue();
+ if ((Offset2 & (PrefAlign-1)) != 0)
+ continue;
+ AllocaInst *AI;
+ if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
+ DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+ AI->setAlignment(PrefAlign);
+ // Global variables can only be aligned if they are defined in this
+ // object (i.e. they are uniquely initialized in this object), and
+ // over-aligning global variables that have an explicit section is
+ // forbidden.
+ GlobalVariable *GV;
+ if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
+ GV->getPointerAlignment(*DL) < PrefAlign &&
+ DL->getTypeAllocSize(GV->getValueType()) >=
+ MinSize + Offset2)
+ GV->setAlignment(PrefAlign);
+ }
+ // If this is a memcpy (or similar) then we may be able to improve the
+ // alignment
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+ unsigned Align = getKnownAlignment(MI->getDest(), *DL);
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+ Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));
+ if (Align > MI->getAlignment())
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
+ }
+ }
+
+ // If we have a cold call site, try to sink addressing computation into the
+ // cold block. This interacts with our handling for loads and stores to
+ // ensure that we can fold all uses of a potential addressing computation
+ // into their uses. TODO: generalize this to work over profiling data
+ if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ for (auto &Arg : CI->arg_operands()) {
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ unsigned AS = Arg->getType()->getPointerAddressSpace();
+ return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+ if (II) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::objectsize: {
+ // Lower all uses of llvm.objectsize.*
+ uint64_t Size;
+ Type *ReturnTy = CI->getType();
+ Constant *RetVal = nullptr;
+ ConstantInt *Op1 = cast<ConstantInt>(II->getArgOperand(1));
+ ObjSizeMode Mode = Op1->isZero() ? ObjSizeMode::Max : ObjSizeMode::Min;
+ if (getObjectSize(II->getArgOperand(0),
+ Size, *DL, TLInfo, false, Mode)) {
+ RetVal = ConstantInt::get(ReturnTy, Size);
+ } else {
+ RetVal = ConstantInt::get(ReturnTy,
+ Mode == ObjSizeMode::Min ? 0 : -1ULL);
+ }
+ // Substituting this can cause recursive simplifications, which can
+ // invalidate our iterator. Use a WeakVH to hold onto it in case this
+ // happens.
+ Value *CurValue = &*CurInstIterator;
+ WeakVH IterHandle(CurValue);
+
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ if (IterHandle != CurValue) {
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
+ return true;
+ }
+ case Intrinsic::masked_load: {
+ // Scalarize unsupported vector masked load
+ if (!TTI->isLegalMaskedLoad(CI->getType())) {
+ scalarizeMaskedLoad(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_store: {
+ if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
+ scalarizeMaskedStore(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_gather: {
+ if (!TTI->isLegalMaskedGather(CI->getType())) {
+ scalarizeMaskedGather(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_scatter: {
+ if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
+ scalarizeMaskedScatter(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::aarch64_stlxr:
+ case Intrinsic::aarch64_stxr: {
+ ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
+ if (!ExtVal || !ExtVal->hasOneUse() ||
+ ExtVal->getParent() == CI->getParent())
+ return false;
+ // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
+ ExtVal->moveBefore(CI);
+ // Mark this instruction as "inserted by CGP", so that other
+ // optimizations don't touch it.
+ InsertedInsts.insert(ExtVal);
+ return true;
+ }
+ case Intrinsic::invariant_group_barrier:
+ II->replaceAllUsesWith(II->getArgOperand(0));
+ II->eraseFromParent();
+ return true;
+
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz:
+ // If counting zeros is expensive, try to avoid it.
+ return despeculateCountZeros(II, TLI, DL, ModifiedDT);
+ }
+
+ if (TLI) {
+ // Unknown address space.
+ // TODO: Target hook to pick which address space the intrinsic cares
+ // about?
+ unsigned AddrSpace = ~0u;
+ SmallVector<Value*, 2> PtrOps;
+ Type *AccessTy;
+ if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
+ while (!PtrOps.empty())
+ if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
+ return true;
+ }
+ }
+
+ // From here on out we're working with named functions.
+ if (!CI->getCalledFunction()) return false;
+
+ // Lower all default uses of _chk calls. This is very similar
+ // to what InstCombineCalls does, but here we are only lowering calls
+ // to fortified library functions (e.g. __memcpy_chk) that have the default
+ // "don't know" as the objectsize. Anything else should be left alone.
+ FortifiedLibCallSimplifier Simplifier(TLInfo, true);
+ if (Value *V = Simplifier.optimizeCall(CI)) {
+ CI->replaceAllUsesWith(V);
+ CI->eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+/// Look for opportunities to duplicate return instructions to the predecessor
+/// to enable tail call optimizations. The case it is currently looking for is:
+/// @code
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// br label %return
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// br label %return
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// br label %return
+/// return:
+/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+/// ret i32 %retval
+/// @endcode
+///
+/// =>
+///
+/// @code
+/// bb0:
+/// %tmp0 = tail call i32 @f0()
+/// ret i32 %tmp0
+/// bb1:
+/// %tmp1 = tail call i32 @f1()
+/// ret i32 %tmp1
+/// bb2:
+/// %tmp2 = tail call i32 @f2()
+/// ret i32 %tmp2
+/// @endcode
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
+ if (!TLI)
+ return false;
+
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI)
+ return false;
+
+ PHINode *PN = nullptr;
+ BitCastInst *BCI = nullptr;
+ Value *V = RI->getReturnValue();
+ if (V) {
+ BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ V = BCI->getOperand(0);
+
+ PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return false;
+ }
+
+ if (PN && PN->getParent() != BB)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ // See llvm::isInTailCallPosition().
+ const Function *F = BB->getParent();
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Make sure there are no instructions between the PHI and return, or that the
+ // return is the first instruction in the block.
+ if (PN) {
+ BasicBlock::iterator BI = BB->begin();
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI == BCI)
+ // Also skip over the bitcast.
+ ++BI;
+ if (&*BI != RI)
+ return false;
+ } else {
+ BasicBlock::iterator BI = BB->begin();
+ while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+ if (&*BI != RI)
+ return false;
+ }
+
+ /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+ /// call.
+ SmallVector<CallInst*, 4> TailCalls;
+ if (PN) {
+ for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+ CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+ // Make sure the phi value is indeed produced by the tail call.
+ if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+ TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ } else {
+ SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ if (!VisitedBBs.insert(*PI).second)
+ continue;
+
+ BasicBlock::InstListType &InstList = (*PI)->getInstList();
+ BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+ BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+ do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+ if (RI == RE)
+ continue;
+
+ CallInst *CI = dyn_cast<CallInst>(&*RI);
+ if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+ TailCalls.push_back(CI);
+ }
+ }
+
+ bool Changed = false;
+ for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+ CallInst *CI = TailCalls[i];
+ CallSite CS(CI);
+
+ // Conservatively require the attributes of the call to match those of the
+ // return. Ignore noalias because it doesn't affect the call sequence.
+ AttributeSet CalleeAttrs = CS.getAttributes();
+ if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+ removeAttribute(Attribute::NoAlias))
+ continue;
+
+ // Make sure the call instruction is followed by an unconditional branch to
+ // the return block.
+ BasicBlock *CallBB = CI->getParent();
+ BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+ if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+ continue;
+
+ // Duplicate the return into CallBB.
+ (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+ ModifiedDT = Changed = true;
+ ++NumRetsDup;
+ }
+
+ // If we eliminated all predecessors of the block, delete the block now.
+ if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
+ BB->eraseFromParent();
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// This is an extended version of TargetLowering::AddrMode
+/// which holds actual Value*'s for register values.
+struct ExtAddrMode : public TargetLowering::AddrMode {
+ Value *BaseReg;
+ Value *ScaledReg;
+ ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
+ void print(raw_ostream &OS) const;
+ void dump() const;
+
+ bool operator==(const ExtAddrMode& O) const {
+ return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
+ (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
+ (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+ }
+};
+
+#ifndef NDEBUG
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
+ AM.print(OS);
+ return OS;
+}
+#endif
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+ bool NeedPlus = false;
+ OS << "[";
+ if (BaseGV) {
+ OS << (NeedPlus ? " + " : "")
+ << "GV:";
+ BaseGV->printAsOperand(OS, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+
+ if (BaseOffs) {
+ OS << (NeedPlus ? " + " : "")
+ << BaseOffs;
+ NeedPlus = true;
+ }
+
+ if (BaseReg) {
+ OS << (NeedPlus ? " + " : "")
+ << "Base:";
+ BaseReg->printAsOperand(OS, /*PrintType=*/false);
+ NeedPlus = true;
+ }
+ if (Scale) {
+ OS << (NeedPlus ? " + " : "")
+ << Scale << "*";
+ ScaledReg->printAsOperand(OS, /*PrintType=*/false);
+ }
+
+ OS << ']';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+/// \brief This class provides transaction based operation on the IR.
+/// Every change made through this class is recorded in the internal state and
+/// can be undone (rollback) until commit is called.
+class TypePromotionTransaction {
+
+ /// \brief This represents the common interface of the individual transaction.
+ /// Each class implements the logic for doing one specific modification on
+ /// the IR via the TypePromotionTransaction.
+ class TypePromotionAction {
+ protected:
+ /// The Instruction modified.
+ Instruction *Inst;
+
+ public:
+ /// \brief Constructor of the action.
+ /// The constructor performs the related action on the IR.
+ TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
+
+ virtual ~TypePromotionAction() {}
+
+ /// \brief Undo the modification done by this action.
+ /// When this method is called, the IR must be in the same state as it was
+ /// before this action was applied.
+ /// \pre Undoing the action works if and only if the IR is in the exact same
+ /// state as it was directly after this action was applied.
+ virtual void undo() = 0;
+
+ /// \brief Advocate every change made by this action.
+ /// When the results on the IR of the action are to be kept, it is important
+ /// to call this function, otherwise hidden information may be kept forever.
+ virtual void commit() {
+ // Nothing to be done, this action is not doing anything.
+ }
+ };
+
+ /// \brief Utility to remember the position of an instruction.
+ class InsertionHandler {
+ /// Position of an instruction.
+ /// Either an instruction:
+ /// - Is the first in a basic block: BB is used.
+ /// - Has a previous instructon: PrevInst is used.
+ union {
+ Instruction *PrevInst;
+ BasicBlock *BB;
+ } Point;
+ /// Remember whether or not the instruction had a previous instruction.
+ bool HasPrevInstruction;
+
+ public:
+ /// \brief Record the position of \p Inst.
+ InsertionHandler(Instruction *Inst) {
+ BasicBlock::iterator It = Inst->getIterator();
+ HasPrevInstruction = (It != (Inst->getParent()->begin()));
+ if (HasPrevInstruction)
+ Point.PrevInst = &*--It;
+ else
+ Point.BB = Inst->getParent();
+ }
+
+ /// \brief Insert \p Inst at the recorded position.
+ void insert(Instruction *Inst) {
+ if (HasPrevInstruction) {
+ if (Inst->getParent())
+ Inst->removeFromParent();
+ Inst->insertAfter(Point.PrevInst);
+ } else {
+ Instruction *Position = &*Point.BB->getFirstInsertionPt();
+ if (Inst->getParent())
+ Inst->moveBefore(Position);
+ else
+ Inst->insertBefore(Position);
+ }
+ }
+ };
+
+ /// \brief Move an instruction before another.
+ class InstructionMoveBefore : public TypePromotionAction {
+ /// Original position of the instruction.
+ InsertionHandler Position;
+
+ public:
+ /// \brief Move \p Inst before \p Before.
+ InstructionMoveBefore(Instruction *Inst, Instruction *Before)
+ : TypePromotionAction(Inst), Position(Inst) {
+ DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
+ Inst->moveBefore(Before);
+ }
+
+ /// \brief Move the instruction back to its original position.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
+ Position.insert(Inst);
+ }
+ };
+
+ /// \brief Set the operand of an instruction with a new value.
+ class OperandSetter : public TypePromotionAction {
+ /// Original operand of the instruction.
+ Value *Origin;
+ /// Index of the modified instruction.
+ unsigned Idx;
+
+ public:
+ /// \brief Set \p Idx operand of \p Inst with \p NewVal.
+ OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
+ : TypePromotionAction(Inst), Idx(Idx) {
+ DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
+ << "for:" << *Inst << "\n"
+ << "with:" << *NewVal << "\n");
+ Origin = Inst->getOperand(Idx);
+ Inst->setOperand(Idx, NewVal);
+ }
+
+ /// \brief Restore the original value of the instruction.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
+ << "for: " << *Inst << "\n"
+ << "with: " << *Origin << "\n");
+ Inst->setOperand(Idx, Origin);
+ }
+ };
+
+ /// \brief Hide the operands of an instruction.
+ /// Do as if this instruction was not using any of its operands.
+ class OperandsHider : public TypePromotionAction {
+ /// The list of original operands.
+ SmallVector<Value *, 4> OriginalValues;
+
+ public:
+ /// \brief Remove \p Inst from the uses of the operands of \p Inst.
+ OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
+ DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
+ unsigned NumOpnds = Inst->getNumOperands();
+ OriginalValues.reserve(NumOpnds);
+ for (unsigned It = 0; It < NumOpnds; ++It) {
+ // Save the current operand.
+ Value *Val = Inst->getOperand(It);
+ OriginalValues.push_back(Val);
+ // Set a dummy one.
+ // We could use OperandSetter here, but that would imply an overhead
+ // that we are not willing to pay.
+ Inst->setOperand(It, UndefValue::get(Val->getType()));
+ }
+ }
+
+ /// \brief Restore the original list of uses.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
+ for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
+ Inst->setOperand(It, OriginalValues[It]);
+ }
+ };
+
+ /// \brief Build a truncate instruction.
+ class TruncBuilder : public TypePromotionAction {
+ Value *Val;
+ public:
+ /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
+ /// result.
+ /// trunc Opnd to Ty.
+ TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
+ IRBuilder<> Builder(Opnd);
+ Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
+ DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
+ }
+
+ /// \brief Get the built value.
+ Value *getBuiltValue() { return Val; }
+
+ /// \brief Remove the built instruction.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
+ }
+ };
+
+ /// \brief Build a sign extension instruction.
+ class SExtBuilder : public TypePromotionAction {
+ Value *Val;
+ public:
+ /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
+ /// result.
+ /// sext Opnd to Ty.
+ SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
+ : TypePromotionAction(InsertPt) {
+ IRBuilder<> Builder(InsertPt);
+ Val = Builder.CreateSExt(Opnd, Ty, "promoted");
+ DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
+ }
+
+ /// \brief Get the built value.
+ Value *getBuiltValue() { return Val; }
+
+ /// \brief Remove the built instruction.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
+ }
+ };
+
+ /// \brief Build a zero extension instruction.
+ class ZExtBuilder : public TypePromotionAction {
+ Value *Val;
+ public:
+ /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
+ /// result.
+ /// zext Opnd to Ty.
+ ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
+ : TypePromotionAction(InsertPt) {
+ IRBuilder<> Builder(InsertPt);
+ Val = Builder.CreateZExt(Opnd, Ty, "promoted");
+ DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
+ }
+
+ /// \brief Get the built value.
+ Value *getBuiltValue() { return Val; }
+
+ /// \brief Remove the built instruction.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
+ if (Instruction *IVal = dyn_cast<Instruction>(Val))
+ IVal->eraseFromParent();
+ }
+ };
+
+ /// \brief Mutate an instruction to another type.
+ class TypeMutator : public TypePromotionAction {
+ /// Record the original type.
+ Type *OrigTy;
+
+ public:
+ /// \brief Mutate the type of \p Inst into \p NewTy.
+ TypeMutator(Instruction *Inst, Type *NewTy)
+ : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
+ DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
+ << "\n");
+ Inst->mutateType(NewTy);
+ }
+
+ /// \brief Mutate the instruction back to its original type.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
+ << "\n");
+ Inst->mutateType(OrigTy);
+ }
+ };
+
+ /// \brief Replace the uses of an instruction by another instruction.
+ class UsesReplacer : public TypePromotionAction {
+ /// Helper structure to keep track of the replaced uses.
+ struct InstructionAndIdx {
+ /// The instruction using the instruction.
+ Instruction *Inst;
+ /// The index where this instruction is used for Inst.
+ unsigned Idx;
+ InstructionAndIdx(Instruction *Inst, unsigned Idx)
+ : Inst(Inst), Idx(Idx) {}
+ };
+
+ /// Keep track of the original uses (pair Instruction, Index).
+ SmallVector<InstructionAndIdx, 4> OriginalUses;
+ typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
+
+ public:
+ /// \brief Replace all the use of \p Inst by \p New.
+ UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
+ DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
+ << "\n");
+ // Record the original uses.
+ for (Use &U : Inst->uses()) {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
+ }
+ // Now, we can replace the uses.
+ Inst->replaceAllUsesWith(New);
+ }
+
+ /// \brief Reassign the original uses of Inst to Inst.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
+ for (use_iterator UseIt = OriginalUses.begin(),
+ EndIt = OriginalUses.end();
+ UseIt != EndIt; ++UseIt) {
+ UseIt->Inst->setOperand(UseIt->Idx, Inst);
+ }
+ }
+ };
+
+ /// \brief Remove an instruction from the IR.
+ class InstructionRemover : public TypePromotionAction {
+ /// Original position of the instruction.
+ InsertionHandler Inserter;
+ /// Helper structure to hide all the link to the instruction. In other
+ /// words, this helps to do as if the instruction was removed.
+ OperandsHider Hider;
+ /// Keep track of the uses replaced, if any.
+ UsesReplacer *Replacer;
+
+ public:
+ /// \brief Remove all reference of \p Inst and optinally replace all its
+ /// uses with New.
+ /// \pre If !Inst->use_empty(), then New != nullptr
+ InstructionRemover(Instruction *Inst, Value *New = nullptr)
+ : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
+ Replacer(nullptr) {
+ if (New)
+ Replacer = new UsesReplacer(Inst, New);
+ DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+ Inst->removeFromParent();
+ }
+
+ ~InstructionRemover() override { delete Replacer; }
+
+ /// \brief Really remove the instruction.
+ void commit() override { delete Inst; }
+
+ /// \brief Resurrect the instruction and reassign it to the proper uses if
+ /// new value was provided when build this action.
+ void undo() override {
+ DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
+ Inserter.insert(Inst);
+ if (Replacer)
+ Replacer->undo();
+ Hider.undo();
+ }
+ };
+
+public:
+ /// Restoration point.
+ /// The restoration point is a pointer to an action instead of an iterator
+ /// because the iterator may be invalidated but not the pointer.
+ typedef const TypePromotionAction *ConstRestorationPt;
+ /// Advocate every changes made in that transaction.
+ void commit();
+ /// Undo all the changes made after the given point.
+ void rollback(ConstRestorationPt Point);
+ /// Get the current restoration point.
+ ConstRestorationPt getRestorationPoint() const;
+
+ /// \name API for IR modification with state keeping to support rollback.
+ /// @{
+ /// Same as Instruction::setOperand.
+ void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
+ /// Same as Instruction::eraseFromParent.
+ void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
+ /// Same as Value::replaceAllUsesWith.
+ void replaceAllUsesWith(Instruction *Inst, Value *New);
+ /// Same as Value::mutateType.
+ void mutateType(Instruction *Inst, Type *NewTy);
+ /// Same as IRBuilder::createTrunc.
+ Value *createTrunc(Instruction *Opnd, Type *Ty);
+ /// Same as IRBuilder::createSExt.
+ Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
+ /// Same as IRBuilder::createZExt.
+ Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
+ /// Same as Instruction::moveBefore.
+ void moveBefore(Instruction *Inst, Instruction *Before);
+ /// @}
+
+private:
+ /// The ordered list of actions made so far.
+ SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
+ typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
+};
+
+void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
+ Value *NewVal) {
+ Actions.push_back(
+ make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
+}
+
+void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
+ Value *NewVal) {
+ Actions.push_back(
+ make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
+}
+
+void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
+ Value *New) {
+ Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
+}
+
+void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
+ Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
+}
+
+Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
+ Type *Ty) {
+ std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
+ Value *Val = Ptr->getBuiltValue();
+ Actions.push_back(std::move(Ptr));
+ return Val;
+}
+
+Value *TypePromotionTransaction::createSExt(Instruction *Inst,
+ Value *Opnd, Type *Ty) {
+ std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
+ Value *Val = Ptr->getBuiltValue();
+ Actions.push_back(std::move(Ptr));
+ return Val;
+}
+
+Value *TypePromotionTransaction::createZExt(Instruction *Inst,
+ Value *Opnd, Type *Ty) {
+ std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
+ Value *Val = Ptr->getBuiltValue();
+ Actions.push_back(std::move(Ptr));
+ return Val;
+}
+
+void TypePromotionTransaction::moveBefore(Instruction *Inst,
+ Instruction *Before) {
+ Actions.push_back(
+ make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
+}
+
+TypePromotionTransaction::ConstRestorationPt
+TypePromotionTransaction::getRestorationPoint() const {
+ return !Actions.empty() ? Actions.back().get() : nullptr;
+}
+
+void TypePromotionTransaction::commit() {
+ for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
+ ++It)
+ (*It)->commit();
+ Actions.clear();
+}
+
+void TypePromotionTransaction::rollback(
+ TypePromotionTransaction::ConstRestorationPt Point) {
+ while (!Actions.empty() && Point != Actions.back().get()) {
+ std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
+ Curr->undo();
+ }
+}
+
+/// \brief A helper class for matching addressing modes.
+///
+/// This encapsulates the logic for matching the target-legal addressing modes.
+class AddressingModeMatcher {
+ SmallVectorImpl<Instruction*> &AddrModeInsts;
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ const DataLayout &DL;
+
+ /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
+ /// the memory instruction that we're computing this address for.
+ Type *AccessTy;
+ unsigned AddrSpace;
+ Instruction *MemoryInst;
+
+ /// This is the addressing mode that we're building up. This is
+ /// part of the return value of this addressing mode matching stuff.
+ ExtAddrMode &AddrMode;
+
+ /// The instructions inserted by other CodeGenPrepare optimizations.
+ const SetOfInstrs &InsertedInsts;
+ /// A map from the instructions to their type before promotion.
+ InstrToOrigTy &PromotedInsts;
+ /// The ongoing transaction where every action should be registered.
+ TypePromotionTransaction &TPT;
+
+ /// This is set to true when we should not do profitability checks.
+ /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
+ bool IgnoreProfitability;
+
+ AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
+ const TargetMachine &TM, Type *AT, unsigned AS,
+ Instruction *MI, ExtAddrMode &AM,
+ const SetOfInstrs &InsertedInsts,
+ InstrToOrigTy &PromotedInsts,
+ TypePromotionTransaction &TPT)
+ : AddrModeInsts(AMI), TM(TM),
+ TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
+ ->getTargetLowering()),
+ DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
+ MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
+ PromotedInsts(PromotedInsts), TPT(TPT) {
+ IgnoreProfitability = false;
+ }
+public:
+
+ /// Find the maximal addressing mode that a load/store of V can fold,
+ /// give an access type of AccessTy. This returns a list of involved
+ /// instructions in AddrModeInsts.
+ /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
+ /// optimizations.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ /// \p The ongoing transaction where every action should be registered.
+ static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
+ Instruction *MemoryInst,
+ SmallVectorImpl<Instruction*> &AddrModeInsts,
+ const TargetMachine &TM,
+ const SetOfInstrs &InsertedInsts,
+ InstrToOrigTy &PromotedInsts,
+ TypePromotionTransaction &TPT) {
+ ExtAddrMode Result;
+
+ bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
+ MemoryInst, Result, InsertedInsts,
+ PromotedInsts, TPT).matchAddr(V, 0);
+ (void)Success; assert(Success && "Couldn't select *anything*?");
+ return Result;
+ }
+private:
+ bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+ bool matchAddr(Value *V, unsigned Depth);
+ bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
+ bool *MovedAway = nullptr);
+ bool isProfitableToFoldIntoAddressingMode(Instruction *I,
+ ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter);
+ bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+ bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
+ Value *PromotedOperand) const;
+};
+
+/// Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
+ unsigned Depth) {
+ // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+ // mode. Just process that directly.
+ if (Scale == 1)
+ return matchAddr(ScaleReg, Depth);
+
+ // If the scale is 0, it takes nothing to add this.
+ if (Scale == 0)
+ return true;
+
+ // If we already have a scale of this value, we can add to it, otherwise, we
+ // need an available scale field.
+ if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+ return false;
+
+ ExtAddrMode TestAddrMode = AddrMode;
+
+ // Add scale to turn X*4+X*3 -> X*7. This could also do things like
+ // [A+B + A*7] -> [B+A*8].
+ TestAddrMode.Scale += Scale;
+ TestAddrMode.ScaledReg = ScaleReg;
+
+ // If the new address isn't legal, bail out.
+ if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
+ return false;
+
+ // It was legal, so commit it.
+ AddrMode = TestAddrMode;
+
+ // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
+ // to see if ScaleReg is actually X+C. If so, we can turn this into adding
+ // X*Scale + C*Scale to addr mode.
+ ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
+ if (isa<Instruction>(ScaleReg) && // not a constant expr.
+ match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ TestAddrMode.ScaledReg = AddLHS;
+ TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+
+ // If this addressing mode is legal, commit it and remember that we folded
+ // this instruction.
+ if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
+ AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ }
+
+ // Otherwise, not (x+c)*scale, just return what we have.
+ return true;
+}
+
+/// This is a little filter, which returns true if an addressing computation
+/// involving I might be folded into a load/store accessing it.
+/// This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ // Don't touch identity bitcasts.
+ if (I->getType() == I->getOperand(0)->getType())
+ return false;
+ return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return true;
+ case Instruction::IntToPtr:
+ // We know the input is intptr_t, so this is foldable.
+ return true;
+ case Instruction::Add:
+ return true;
+ case Instruction::Mul:
+ case Instruction::Shl:
+ // Can only handle X*C and X << C.
+ return isa<ConstantInt>(I->getOperand(1));
+ case Instruction::GetElementPtr:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// \brief Check whether or not \p Val is a legal instruction for \p TLI.
+/// \note \p Val is assumed to be the product of some type promotion.
+/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
+/// to be legal, as the non-promoted value would have had the same state.
+static bool isPromotedInstructionLegal(const TargetLowering &TLI,
+ const DataLayout &DL, Value *Val) {
+ Instruction *PromotedInst = dyn_cast<Instruction>(Val);
+ if (!PromotedInst)
+ return false;
+ int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
+ // If the ISDOpcode is undefined, it was undefined before the promotion.
+ if (!ISDOpcode)
+ return true;
+ // Otherwise, check if the promoted instruction is legal or not.
+ return TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
+}
+
+/// \brief Hepler class to perform type promotion.
+class TypePromotionHelper {
+ /// \brief Utility function to check whether or not a sign or zero extension
+ /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
+ /// either using the operands of \p Inst or promoting \p Inst.
+ /// The type of the extension is defined by \p IsSExt.
+ /// In other words, check if:
+ /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
+ /// #1 Promotion applies:
+ /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
+ /// #2 Operand reuses:
+ /// ext opnd1 to ConsideredExtType.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
+ const InstrToOrigTy &PromotedInsts, bool IsSExt);
+
+ /// \brief Utility function to determine if \p OpIdx should be promoted when
+ /// promoting \p Inst.
+ static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
+ return !(isa<SelectInst>(Inst) && OpIdx == 0);
+ }
+
+ /// \brief Utility function to promote the operand of \p Ext when this
+ /// operand is a promotable trunc or sext or zext.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ /// \p CreatedInstsCost[out] contains the cost of all instructions
+ /// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
+ /// Should never be called directly.
+ /// \return The promoted value which is used instead of Ext.
+ static Value *promoteOperandForTruncAndAnyExt(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
+
+ /// \brief Utility function to promote the operand of \p Ext when this
+ /// operand is promotable and is not a supported trunc or sext.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ /// \p CreatedInstsCost[out] contains the cost of all the instructions
+ /// created to promote the operand of Ext.
+ /// Newly added extensions are inserted in \p Exts.
+ /// Newly added truncates are inserted in \p Truncs.
+ /// Should never be called directly.
+ /// \return The promoted value which is used instead of Ext.
+ static Value *promoteOperandForOther(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI, bool IsSExt);
+
+ /// \see promoteOperandForOther.
+ static Value *signExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, true);
+ }
+
+ /// \see promoteOperandForOther.
+ static Value *zeroExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, false);
+ }
+
+public:
+ /// Type for the utility function that promotes the operand of Ext.
+ typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI);
+ /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
+ /// action to promote the operand of \p Ext instead of using Ext.
+ /// \return NULL if no promotable action is possible with the current
+ /// sign extension.
+ /// \p InsertedInsts keeps track of all the instructions inserted by the
+ /// other CodeGenPrepare optimizations. This information is important
+ /// because we do not want to promote these instructions as CodeGenPrepare
+ /// will reinsert them later. Thus creating an infinite loop: create/remove.
+ /// \p PromotedInsts maps the instructions to their type before promotion.
+ static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
+ const TargetLowering &TLI,
+ const InstrToOrigTy &PromotedInsts);
+};
+
+bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
+ Type *ConsideredExtType,
+ const InstrToOrigTy &PromotedInsts,
+ bool IsSExt) {
+ // The promotion helper does not know how to deal with vector types yet.
+ // To be able to fix that, we would need to fix the places where we
+ // statically extend, e.g., constants and such.
+ if (Inst->getType()->isVectorTy())
+ return false;
+
+ // We can always get through zext.
+ if (isa<ZExtInst>(Inst))
+ return true;
+
+ // sext(sext) is ok too.
+ if (IsSExt && isa<SExtInst>(Inst))
+ return true;
+
+ // We can get through binary operator, if it is legal. In other words, the
+ // binary operator must have a nuw or nsw flag.
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
+ if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
+ ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
+ (IsSExt && BinOp->hasNoSignedWrap())))
+ return true;
+
+ // Check if we can do the following simplification.
+ // ext(trunc(opnd)) --> ext(opnd)
+ if (!isa<TruncInst>(Inst))
+ return false;
+
+ Value *OpndVal = Inst->getOperand(0);
+ // Check if we can use this operand in the extension.
+ // If the type is larger than the result type of the extension, we cannot.
+ if (!OpndVal->getType()->isIntegerTy() ||
+ OpndVal->getType()->getIntegerBitWidth() >
+ ConsideredExtType->getIntegerBitWidth())
+ return false;
+
+ // If the operand of the truncate is not an instruction, we will not have
+ // any information on the dropped bits.
+ // (Actually we could for constant but it is not worth the extra logic).
+ Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
+ if (!Opnd)
+ return false;
+
+ // Check if the source of the type is narrow enough.
+ // I.e., check that trunc just drops extended bits of the same kind of
+ // the extension.
+ // #1 get the type of the operand and check the kind of the extended bits.
+ const Type *OpndType;
+ InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
+ if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)
+ OpndType = It->second.getPointer();
+ else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
+ OpndType = Opnd->getOperand(0)->getType();
+ else
+ return false;
+
+ // #2 check that the truncate just drops extended bits.
+ return Inst->getType()->getIntegerBitWidth() >=
+ OpndType->getIntegerBitWidth();
+}
+
+TypePromotionHelper::Action TypePromotionHelper::getAction(
+ Instruction *Ext, const SetOfInstrs &InsertedInsts,
+ const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
+ assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
+ "Unexpected instruction type");
+ Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
+ Type *ExtTy = Ext->getType();
+ bool IsSExt = isa<SExtInst>(Ext);
+ // If the operand of the extension is not an instruction, we cannot
+ // get through.
+ // If it, check we can get through.
+ if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
+ return nullptr;
+
+ // Do not promote if the operand has been added by codegenprepare.
+ // Otherwise, it means we are undoing an optimization that is likely to be
+ // redone, thus causing potential infinite loop.
+ if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
+ return nullptr;
+
+ // SExt or Trunc instructions.
+ // Return the related handler.
+ if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
+ isa<ZExtInst>(ExtOpnd))
+ return promoteOperandForTruncAndAnyExt;
+
+ // Regular instruction.
+ // Abort early if we will have to insert non-free instructions.
+ if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
+ return nullptr;
+ return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
+}
+
+Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
+ llvm::Instruction *SExt, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ // By construction, the operand of SExt is an instruction. Otherwise we cannot
+ // get through it and this method should not be called.
+ Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
+ Value *ExtVal = SExt;
+ bool HasMergedNonFreeExt = false;
+ if (isa<ZExtInst>(SExtOpnd)) {
+ // Replace s|zext(zext(opnd))
+ // => zext(opnd).
+ HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
+ Value *ZExt =
+ TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
+ TPT.replaceAllUsesWith(SExt, ZExt);
+ TPT.eraseInstruction(SExt);
+ ExtVal = ZExt;
+ } else {
+ // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
+ // => z|sext(opnd).
+ TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
+ }
+ CreatedInstsCost = 0;
+
+ // Remove dead code.
+ if (SExtOpnd->use_empty())
+ TPT.eraseInstruction(SExtOpnd);
+
+ // Check if the extension is still needed.
+ Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
+ if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
+ if (ExtInst) {
+ if (Exts)
+ Exts->push_back(ExtInst);
+ CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
+ }
+ return ExtVal;
+ }
+
+ // At this point we have: ext ty opnd to ty.
+ // Reassign the uses of ExtInst to the opnd and remove ExtInst.
+ Value *NextVal = ExtInst->getOperand(0);
+ TPT.eraseInstruction(ExtInst, NextVal);
+ return NextVal;
+}
+
+Value *TypePromotionHelper::promoteOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
+ bool IsSExt) {
+ // By construction, the operand of Ext is an instruction. Otherwise we cannot
+ // get through it and this method should not be called.
+ Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
+ CreatedInstsCost = 0;
+ if (!ExtOpnd->hasOneUse()) {
+ // ExtOpnd will be promoted.
+ // All its uses, but Ext, will need to use a truncated value of the
+ // promoted version.
+ // Create the truncate now.
+ Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
+ if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
+ ITrunc->removeFromParent();
+ // Insert it just after the definition.
+ ITrunc->insertAfter(ExtOpnd);
+ if (Truncs)
+ Truncs->push_back(ITrunc);
+ }
+
+ TPT.replaceAllUsesWith(ExtOpnd, Trunc);
+ // Restore the operand of Ext (which has been replaced by the previous call
+ // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
+ TPT.setOperand(Ext, 0, ExtOpnd);
+ }
+
+ // Get through the Instruction:
+ // 1. Update its type.
+ // 2. Replace the uses of Ext by Inst.
+ // 3. Extend each operand that needs to be extended.
+
+ // Remember the original type of the instruction before promotion.
+ // This is useful to know that the high bits are sign extended bits.
+ PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
+ ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
+ // Step #1.
+ TPT.mutateType(ExtOpnd, Ext->getType());
+ // Step #2.
+ TPT.replaceAllUsesWith(Ext, ExtOpnd);
+ // Step #3.
+ Instruction *ExtForOpnd = Ext;
+
+ DEBUG(dbgs() << "Propagate Ext to operands\n");
+ for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
+ ++OpIdx) {
+ DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
+ if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
+ !shouldExtOperand(ExtOpnd, OpIdx)) {
+ DEBUG(dbgs() << "No need to propagate\n");
+ continue;
+ }
+ // Check if we can statically extend the operand.
+ Value *Opnd = ExtOpnd->getOperand(OpIdx);
+ if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
+ DEBUG(dbgs() << "Statically extend\n");
+ unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
+ APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
+ : Cst->getValue().zext(BitWidth);
+ TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
+ continue;
+ }
+ // UndefValue are typed, so we have to statically sign extend them.
+ if (isa<UndefValue>(Opnd)) {
+ DEBUG(dbgs() << "Statically extend\n");
+ TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
+ continue;
+ }
+
+ // Otherwise we have to explicity sign extend the operand.
+ // Check if Ext was reused to extend an operand.
+ if (!ExtForOpnd) {
+ // If yes, create a new one.
+ DEBUG(dbgs() << "More operands to ext\n");
+ Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
+ : TPT.createZExt(Ext, Opnd, Ext->getType());
+ if (!isa<Instruction>(ValForExtOpnd)) {
+ TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
+ continue;
+ }
+ ExtForOpnd = cast<Instruction>(ValForExtOpnd);
+ }
+ if (Exts)
+ Exts->push_back(ExtForOpnd);
+ TPT.setOperand(ExtForOpnd, 0, Opnd);
+
+ // Move the sign extension before the insertion point.
+ TPT.moveBefore(ExtForOpnd, ExtOpnd);
+ TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
+ CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
+ // If more sext are required, new instructions will have to be created.
+ ExtForOpnd = nullptr;
+ }
+ if (ExtForOpnd == Ext) {
+ DEBUG(dbgs() << "Extension is useless now\n");
+ TPT.eraseInstruction(Ext);
+ }
+ return ExtOpnd;
+}
+
+/// Check whether or not promoting an instruction to a wider type is profitable.
+/// \p NewCost gives the cost of extension instructions created by the
+/// promotion.
+/// \p OldCost gives the cost of extension instructions before the promotion
+/// plus the number of instructions that have been
+/// matched in the addressing mode the promotion.
+/// \p PromotedOperand is the value that has been promoted.
+/// \return True if the promotion is profitable, false otherwise.
+bool AddressingModeMatcher::isPromotionProfitable(
+ unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
+ DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
+ // The cost of the new extensions is greater than the cost of the
+ // old extension plus what we folded.
+ // This is not profitable.
+ if (NewCost > OldCost)
+ return false;
+ if (NewCost < OldCost)
+ return true;
+ // The promotion is neutral but it may help folding the sign extension in
+ // loads for instance.
+ // Check that we did not create an illegal instruction.
+ return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
+}
+
+/// Given an instruction or constant expr, see if we can fold the operation
+/// into the addressing mode. If so, update the addressing mode and return
+/// true, otherwise return false without modifying AddrMode.
+/// If \p MovedAway is not NULL, it contains the information of whether or
+/// not AddrInst has to be folded into the addressing mode on success.
+/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
+/// because it has been moved away.
+/// Thus AddrInst must not be added in the matched instructions.
+/// This state can happen when AddrInst is a sext, since it may be moved away.
+/// Therefore, AddrInst may not be valid when MovedAway is true and it must
+/// not be referenced anymore.
+bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
+ unsigned Depth,
+ bool *MovedAway) {
+ // Avoid exponential behavior on extremely deep expression trees.
+ if (Depth >= 5) return false;
+
+ // By default, all matched instructions stay in place.
+ if (MovedAway)
+ *MovedAway = false;
+
+ switch (Opcode) {
+ case Instruction::PtrToInt:
+ // PtrToInt is always a noop, as we know that the int type is pointer sized.
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ case Instruction::IntToPtr: {
+ auto AS = AddrInst->getType()->getPointerAddressSpace();
+ auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
+ // This inttoptr is a no-op if the integer type is pointer sized.
+ if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ }
+ case Instruction::BitCast:
+ // BitCast is always a noop, and we can handle it as long as it is
+ // int->int or pointer->pointer (we don't want int<->fp or something).
+ if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+ AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+ // Don't touch identity bitcasts. These were probably put here by LSR,
+ // and we don't want to mess around with them. Assume it knows what it
+ // is doing.
+ AddrInst->getOperand(0)->getType() != AddrInst->getType())
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ case Instruction::AddrSpaceCast: {
+ unsigned SrcAS
+ = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
+ unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
+ if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ return matchAddr(AddrInst->getOperand(0), Depth);
+ return false;
+ }
+ case Instruction::Add: {
+ // Check to see if we can merge in the RHS then the LHS. If so, we win.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+ // Start a transaction at this point.
+ // The LHS may match but not the RHS.
+ // Therefore, we need a higher level restoration point to undo partially
+ // matched operation.
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+
+ if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
+ matchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+
+ // Restore the old addr mode info.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ TPT.rollback(LastKnownGood);
+
+ // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
+ if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
+ matchAddr(AddrInst->getOperand(1), Depth+1))
+ return true;
+
+ // Otherwise we definitely can't merge the ADD in.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ TPT.rollback(LastKnownGood);
+ break;
+ }
+ //case Instruction::Or:
+ // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+ //break;
+ case Instruction::Mul:
+ case Instruction::Shl: {
+ // Can only handle X*C and X << C.
+ ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+ if (!RHS)
+ return false;
+ int64_t Scale = RHS->getSExtValue();
+ if (Opcode == Instruction::Shl)
+ Scale = 1LL << Scale;
+
+ return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+ }
+ case Instruction::GetElementPtr: {
+ // Scan the GEP. We check it if it contains constant offsets and at most
+ // one variable offset.
+ int VariableOperand = -1;
+ unsigned VariableScale = 0;
+
+ int64_t ConstantOffset = 0;
+ gep_type_iterator GTI = gep_type_begin(AddrInst);
+ for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ unsigned Idx =
+ cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+ ConstantOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+ ConstantOffset += CI->getSExtValue()*TypeSize;
+ } else if (TypeSize) { // Scales of zero don't do anything.
+ // We only allow one variable index at the moment.
+ if (VariableOperand != -1)
+ return false;
+
+ // Remember the variable index.
+ VariableOperand = i;
+ VariableScale = TypeSize;
+ }
+ }
+ }
+
+ // A common case is for the GEP to only do a constant offset. In this case,
+ // just add it to the disp field and check validity.
+ if (VariableOperand == -1) {
+ AddrMode.BaseOffs += ConstantOffset;
+ if (ConstantOffset == 0 ||
+ TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
+ // Check to see if we can fold the base pointer in too.
+ if (matchAddr(AddrInst->getOperand(0), Depth+1))
+ return true;
+ }
+ AddrMode.BaseOffs -= ConstantOffset;
+ return false;
+ }
+
+ // Save the valid addressing mode in case we can't match.
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // See if the scale and offset amount is valid for this target.
+ AddrMode.BaseOffs += ConstantOffset;
+
+ // Match the base operand of the GEP.
+ if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
+ // If it couldn't be matched, just stuff the value in a register.
+ if (AddrMode.HasBaseReg) {
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ }
+
+ // Match the remaining variable portion of the GEP.
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+ Depth)) {
+ // If it couldn't be matched, try stuffing the base into a register
+ // instead of matching it, and retrying the match of the scale.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ if (AddrMode.HasBaseReg)
+ return false;
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = AddrInst->getOperand(0);
+ AddrMode.BaseOffs += ConstantOffset;
+ if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
+ VariableScale, Depth)) {
+ // If even that didn't work, bail.
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ return false;
+ }
+ }
+
+ return true;
+ }
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ Instruction *Ext = dyn_cast<Instruction>(AddrInst);
+ if (!Ext)
+ return false;
+
+ // Try to move this ext out of the way of the addressing mode.
+ // Ask for a method for doing so.
+ TypePromotionHelper::Action TPH =
+ TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
+ if (!TPH)
+ return false;
+
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ unsigned CreatedInstsCost = 0;
+ unsigned ExtCost = !TLI.isExtFree(Ext);
+ Value *PromotedOperand =
+ TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
+ // SExt has been moved away.
+ // Thus either it will be rematched later in the recursive calls or it is
+ // gone. Anyway, we must not fold it into the addressing mode at this point.
+ // E.g.,
+ // op = add opnd, 1
+ // idx = ext op
+ // addr = gep base, idx
+ // is now:
+ // promotedOpnd = ext opnd <- no match here
+ // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
+ // addr = gep base, op <- match
+ if (MovedAway)
+ *MovedAway = true;
+
+ assert(PromotedOperand &&
+ "TypePromotionHelper should have filtered out those cases");
+
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ if (!matchAddr(PromotedOperand, Depth) ||
+ // The total of the new cost is equal to the cost of the created
+ // instructions.
+ // The total of the old cost is equal to the cost of the extension plus
+ // what we have saved in the addressing mode.
+ !isPromotionProfitable(CreatedInstsCost,
+ ExtCost + (AddrModeInsts.size() - OldSize),
+ PromotedOperand)) {
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
+ TPT.rollback(LastKnownGood);
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+/// If we can, try to add the value of 'Addr' into the current addressing mode.
+/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
+/// unmodified. This assumes that Addr is either a pointer type or intptr_t
+/// for the target.
+///
+bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
+ // Start a transaction at this point that we will rollback if the matching
+ // fails.
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+ // Fold in immediates if legal for the target.
+ AddrMode.BaseOffs += CI->getSExtValue();
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
+ return true;
+ AddrMode.BaseOffs -= CI->getSExtValue();
+ } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+ // If this is a global variable, try to fold it into the addressing mode.
+ if (!AddrMode.BaseGV) {
+ AddrMode.BaseGV = GV;
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
+ return true;
+ AddrMode.BaseGV = nullptr;
+ }
+ } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+ ExtAddrMode BackupAddrMode = AddrMode;
+ unsigned OldSize = AddrModeInsts.size();
+
+ // Check to see if it is possible to fold this operation.
+ bool MovedAway = false;
+ if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
+ // This instruction may have been moved away. If so, there is nothing
+ // to check here.
+ if (MovedAway)
+ return true;
+ // Okay, it's possible to fold this. Check to see if it is actually
+ // *profitable* to do so. We use a simple cost model to avoid increasing
+ // register pressure too much.
+ if (I->hasOneUse() ||
+ isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+ AddrModeInsts.push_back(I);
+ return true;
+ }
+
+ // It isn't profitable to do this, roll back.
+ //cerr << "NOT FOLDING: " << *I;
+ AddrMode = BackupAddrMode;
+ AddrModeInsts.resize(OldSize);
+ TPT.rollback(LastKnownGood);
+ }
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+ if (matchOperationAddr(CE, CE->getOpcode(), Depth))
+ return true;
+ TPT.rollback(LastKnownGood);
+ } else if (isa<ConstantPointerNull>(Addr)) {
+ // Null pointer gets folded without affecting the addressing mode.
+ return true;
+ }
+
+ // Worse case, the target should support [reg] addressing modes. :)
+ if (!AddrMode.HasBaseReg) {
+ AddrMode.HasBaseReg = true;
+ AddrMode.BaseReg = Addr;
+ // Still check for legality in case the target supports [imm] but not [i+r].
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
+ return true;
+ AddrMode.HasBaseReg = false;
+ AddrMode.BaseReg = nullptr;
+ }
+
+ // If the base register is already taken, see if we can do [r+r].
+ if (AddrMode.Scale == 0) {
+ AddrMode.Scale = 1;
+ AddrMode.ScaledReg = Addr;
+ if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
+ return true;
+ AddrMode.Scale = 0;
+ AddrMode.ScaledReg = nullptr;
+ }
+ // Couldn't match.
+ TPT.rollback(LastKnownGood);
+ return false;
+}
+
+/// Check to see if all uses of OpVal by the specified inline asm call are due
+/// to memory operands. If so, return true, otherwise return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+ const TargetMachine &TM) {
+ const Function *F = CI->getParent()->getParent();
+ const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
+ const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
+ ImmutableCallSite(CI));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
+
+ // If this asm operand is our Value*, and if it isn't an indirect memory
+ // operand, we can't fold it!
+ if (OpInfo.CallOperandVal == OpVal &&
+ (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+ !OpInfo.isIndirect))
+ return false;
+ }
+
+ return true;
+}
+
+/// Recursively walk all the uses of I until we find a memory use.
+/// If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(
+ Instruction *I,
+ SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
+ // If we already considered this instruction, we're done.
+ if (!ConsideredInsts.insert(I).second)
+ return false;
+
+ // If this is an obviously unfoldable instruction, bail out.
+ if (!MightBeFoldableInst(I))
+ return true;
+
+ const bool OptSize = I->getFunction()->optForSize();
+
+ // Loop over all the uses, recursively processing them.
+ for (Use &U : I->uses()) {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
+ MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
+ continue;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
+ unsigned opNo = U.getOperandNo();
+ if (opNo == 0) return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(SI, opNo));
+ continue;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
+ // If this is a cold call, we can sink the addressing calculation into
+ // the cold path. See optimizeCallInst
+ if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ continue;
+
+ InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+ if (!IA) return true;
+
+ // If this is a memory operand, we're cool, otherwise bail out.
+ if (!IsOperandAMemoryOperand(CI, IA, I, TM))
+ return true;
+ continue;
+ }
+
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
+ return true;
+ }
+
+ return false;
+}
+
+/// Return true if Val is already known to be live at the use site that we're
+/// folding it into. If so, there is no cost to include it in the addressing
+/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
+/// instruction already.
+bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+ Value *KnownLive2) {
+ // If Val is either of the known-live values, we know it is live!
+ if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
+ return true;
+
+ // All values other than instructions and arguments (e.g. constants) are live.
+ if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+
+ // If Val is a constant sized alloca in the entry block, it is live, this is
+ // true because it is just a reference to the stack/frame pointer, which is
+ // live for the whole function.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+ if (AI->isStaticAlloca())
+ return true;
+
+ // Check to see if this value is already used in the memory instruction's
+ // block. If so, it's already live into the block at the very least, so we
+ // can reasonably fold it.
+ return Val->isUsedInBasicBlock(MemoryInst->getParent());
+}
+
+/// It is possible for the addressing mode of the machine to fold the specified
+/// instruction into a load or store that ultimately uses it.
+/// However, the specified instruction has multiple uses.
+/// Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
+///
+/// X = ...
+/// Y = X+1
+/// use(Y) -> nonload/store
+/// Z = Y+1
+/// load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
+/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case. This would make Y die earlier.
+bool AddressingModeMatcher::
+isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+ ExtAddrMode &AMAfter) {
+ if (IgnoreProfitability) return true;
+
+ // AMBefore is the addressing mode before this instruction was folded into it,
+ // and AMAfter is the addressing mode after the instruction was folded. Get
+ // the set of registers referenced by AMAfter and subtract out those
+ // referenced by AMBefore: this is the set of values which folding in this
+ // address extends the lifetime of.
+ //
+ // Note that there are only two potential values being referenced here,
+ // BaseReg and ScaleReg (global addresses are always available, as are any
+ // folded immediates).
+ Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+
+ // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+ // lifetime wasn't extended by adding this instruction.
+ if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ BaseReg = nullptr;
+ if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+ ScaledReg = nullptr;
+
+ // If folding this instruction (and it's subexprs) didn't extend any live
+ // ranges, we're ok with it.
+ if (!BaseReg && !ScaledReg)
+ return true;
+
+ // If all uses of this instruction can have the address mode sunk into them,
+ // we can remove the addressing mode and effectively trade one live register
+ // for another (at worst.) In this context, folding an addressing mode into
+ // the use is just a particularly nice way of sinking it.
+ SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+ SmallPtrSet<Instruction*, 16> ConsideredInsts;
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
+ return false; // Has a non-memory, non-foldable use!
+
+ // Now that we know that all uses of this instruction are part of a chain of
+ // computation involving only operations that could theoretically be folded
+ // into a memory use, loop over each of these memory operation uses and see
+ // if they could *actually* fold the instruction. The assumption is that
+ // addressing modes are cheap and that duplicating the computation involved
+ // many times is worthwhile, even on a fastpath. For sinking candidates
+ // (i.e. cold call sites), this serves as a way to prevent excessive code
+ // growth since most architectures have some reasonable small and fast way to
+ // compute an effective address. (i.e LEA on x86)
+ SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+ for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+ Instruction *User = MemoryUses[i].first;
+ unsigned OpNo = MemoryUses[i].second;
+
+ // Get the access type of this use. If the use isn't a pointer, we don't
+ // know what it accesses.
+ Value *Address = User->getOperand(OpNo);
+ PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
+ if (!AddrTy)
+ return false;
+ Type *AddressAccessTy = AddrTy->getElementType();
+ unsigned AS = AddrTy->getAddressSpace();
+
+ // Do a match against the root of this address, ignoring profitability. This
+ // will tell us if the addressing mode for the memory operation will
+ // *actually* cover the shared instruction.
+ ExtAddrMode Result;
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
+ MemoryInst, Result, InsertedInsts,
+ PromotedInsts, TPT);
+ Matcher.IgnoreProfitability = true;
+ bool Success = Matcher.matchAddr(Address, 0);
+ (void)Success; assert(Success && "Couldn't select *anything*?");
+
+ // The match was to check the profitability, the changes made are not
+ // part of the original matcher. Therefore, they should be dropped
+ // otherwise the original matcher will not present the right state.
+ TPT.rollback(LastKnownGood);
+
+ // If the match didn't cover I, then it won't be shared by it.
+ if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+ I) == MatchedAddrModeInsts.end())
+ return false;
+
+ MatchedAddrModeInsts.clear();
+ }
+
+ return true;
+}
+
+} // end anonymous namespace
+
+/// Return true if the specified values are defined in a
+/// different basic block than BB.
+static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() != BB;
+ return false;
+}
+
+/// Sink addressing mode computation immediate before MemoryInst if doing so
+/// can be done without increasing register pressure. The need for the
+/// register pressure constraint means this can end up being an all or nothing
+/// decision for all uses of the same addressing computation.
+///
+/// Load and Store Instructions often have addressing modes that can do
+/// significant amounts of computation. As such, instruction selection will try
+/// to get the load or store to do as much computation as possible for the
+/// program. The problem is that isel can only see within a single block. As
+/// such, we sink as much legal addressing mode work into the block as possible.
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands. It's also used to sink addressing computations feeding into cold
+/// call sites into their (cold) basic block.
+///
+/// The motivation for handling sinking into cold blocks is that doing so can
+/// both enable other address mode sinking (by satisfying the register pressure
+/// constraint above), and reduce register pressure globally (by removing the
+/// addressing mode computation from the fast path entirely.).
+bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ Type *AccessTy, unsigned AddrSpace) {
+ Value *Repl = Addr;
+
+ // Try to collapse single-value PHI nodes. This is necessary to undo
+ // unprofitable PRE transformations.
+ SmallVector<Value*, 8> worklist;
+ SmallPtrSet<Value*, 16> Visited;
+ worklist.push_back(Addr);
+
+ // Use a worklist to iteratively look through PHI nodes, and ensure that
+ // the addressing mode obtained from the non-PHI roots of the graph
+ // are equivalent.
+ Value *Consensus = nullptr;
+ unsigned NumUsesConsensus = 0;
+ bool IsNumUsesConsensusValid = false;
+ SmallVector<Instruction*, 16> AddrModeInsts;
+ ExtAddrMode AddrMode;
+ TypePromotionTransaction TPT;
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ while (!worklist.empty()) {
+ Value *V = worklist.back();
+ worklist.pop_back();
+
+ // Break use-def graph loops.
+ if (!Visited.insert(V).second) {
+ Consensus = nullptr;
+ break;
+ }
+
+ // For a PHI node, push all of its incoming values.
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ for (Value *IncValue : P->incoming_values())
+ worklist.push_back(IncValue);
+ continue;
+ }
+
+ // For non-PHIs, determine the addressing mode being computed. Note that
+ // the result may differ depending on what other uses our candidate
+ // addressing instructions might have.
+ SmallVector<Instruction*, 16> NewAddrModeInsts;
+ ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
+ V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
+ InsertedInsts, PromotedInsts, TPT);
+
+ // This check is broken into two cases with very similar code to avoid using
+ // getNumUses() as much as possible. Some values have a lot of uses, so
+ // calling getNumUses() unconditionally caused a significant compile-time
+ // regression.
+ if (!Consensus) {
+ Consensus = V;
+ AddrMode = NewAddrMode;
+ AddrModeInsts = NewAddrModeInsts;
+ continue;
+ } else if (NewAddrMode == AddrMode) {
+ if (!IsNumUsesConsensusValid) {
+ NumUsesConsensus = Consensus->getNumUses();
+ IsNumUsesConsensusValid = true;
+ }
+
+ // Ensure that the obtained addressing mode is equivalent to that obtained
+ // for all other roots of the PHI traversal. Also, when choosing one
+ // such root as representative, select the one with the most uses in order
+ // to keep the cost modeling heuristics in AddressingModeMatcher
+ // applicable.
+ unsigned NumUses = V->getNumUses();
+ if (NumUses > NumUsesConsensus) {
+ Consensus = V;
+ NumUsesConsensus = NumUses;
+ AddrModeInsts = NewAddrModeInsts;
+ }
+ continue;
+ }
+
+ Consensus = nullptr;
+ break;
+ }
+
+ // If the addressing mode couldn't be determined, or if multiple different
+ // ones were determined, bail out now.
+ if (!Consensus) {
+ TPT.rollback(LastKnownGood);
+ return false;
+ }
+ TPT.commit();
+
+ // Check to see if any of the instructions supersumed by this addr mode are
+ // non-local to I's BB.
+ bool AnyNonLocal = false;
+ for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
+ if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
+ AnyNonLocal = true;
+ break;
+ }
+ }
+
+ // If all the instructions matched are already in this BB, don't do anything.
+ if (!AnyNonLocal) {
+ DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
+ return false;
+ }
+
+ // Insert this computation right after this user. Since our caller is
+ // scanning from the top of the BB to the bottom, reuse of the expr are
+ // guaranteed to happen later.
+ IRBuilder<> Builder(MemoryInst);
+
+ // Now that we determined the addressing expression we want to use and know
+ // that we have to sink it into this block. Check to see if we have already
+ // done this for some other load/store instr in this block. If so, reuse the
+ // computation.
+ Value *&SunkAddr = SunkAddrs[Addr];
+ if (SunkAddr) {
+ DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst << "\n");
+ if (SunkAddr->getType() != Addr->getType())
+ SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ } else if (AddrSinkUsingGEPs ||
+ (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
+ TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
+ ->useAA())) {
+ // By default, we use the GEP-based method when AA is used later. This
+ // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
+ DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst << "\n");
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
+ Value *ResultPtr = nullptr, *ResultIndex = nullptr;
+
+ // First, find the pointer.
+ if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
+ ResultPtr = AddrMode.BaseReg;
+ AddrMode.BaseReg = nullptr;
+ }
+
+ if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
+ // We can't add more than one pointer together, nor can we scale a
+ // pointer (both of which seem meaningless).
+ if (ResultPtr || AddrMode.Scale != 1)
+ return false;
+
+ ResultPtr = AddrMode.ScaledReg;
+ AddrMode.Scale = 0;
+ }
+
+ if (AddrMode.BaseGV) {
+ if (ResultPtr)
+ return false;
+
+ ResultPtr = AddrMode.BaseGV;
+ }
+
+ // If the real base value actually came from an inttoptr, then the matcher
+ // will look through it and provide only the integer value. In that case,
+ // use it here.
+ if (!ResultPtr && AddrMode.BaseReg) {
+ ResultPtr =
+ Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
+ AddrMode.BaseReg = nullptr;
+ } else if (!ResultPtr && AddrMode.Scale == 1) {
+ ResultPtr =
+ Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
+ AddrMode.Scale = 0;
+ }
+
+ if (!ResultPtr &&
+ !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
+ SunkAddr = Constant::getNullValue(Addr->getType());
+ } else if (!ResultPtr) {
+ return false;
+ } else {
+ Type *I8PtrTy =
+ Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+ Type *I8Ty = Builder.getInt8Ty();
+
+ // Start with the base register. Do this first so that subsequent address
+ // matching finds it last, which will prevent it from trying to match it
+ // as the scaled value in case it happens to be a mul. That would be
+ // problematic if we've sunk a different mul for the scale, because then
+ // we'd end up sinking both muls.
+ if (AddrMode.BaseReg) {
+ Value *V = AddrMode.BaseReg;
+ if (V->getType() != IntPtrTy)
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
+
+ ResultIndex = V;
+ }
+
+ // Add the scale value.
+ if (AddrMode.Scale) {
+ Value *V = AddrMode.ScaledReg;
+ if (V->getType() == IntPtrTy) {
+ // done.
+ } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth()) {
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
+ } else {
+ // It is only safe to sign extend the BaseReg if we know that the math
+ // required to create it did not overflow before we extend it. Since
+ // the original IR value was tossed in favor of a constant back when
+ // the AddrMode was created we need to bail out gracefully if widths
+ // do not match instead of extending it.
+ Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
+ if (I && (ResultIndex != AddrMode.BaseReg))
+ I->eraseFromParent();
+ return false;
+ }
+
+ if (AddrMode.Scale != 1)
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
+ if (ResultIndex)
+ ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
+ else
+ ResultIndex = V;
+ }
+
+ // Add in the Base Offset if present.
+ if (AddrMode.BaseOffs) {
+ Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ if (ResultIndex) {
+ // We need to add this separately from the scale above to help with
+ // SDAG consecutive load/store merging.
+ if (ResultPtr->getType() != I8PtrTy)
+ ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ }
+
+ ResultIndex = V;
+ }
+
+ if (!ResultIndex) {
+ SunkAddr = ResultPtr;
+ } else {
+ if (ResultPtr->getType() != I8PtrTy)
+ ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ }
+
+ if (SunkAddr->getType() != Addr->getType())
+ SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ }
+ } else {
+ DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst << "\n");
+ Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
+ Value *Result = nullptr;
+
+ // Start with the base register. Do this first so that subsequent address
+ // matching finds it last, which will prevent it from trying to match it
+ // as the scaled value in case it happens to be a mul. That would be
+ // problematic if we've sunk a different mul for the scale, because then
+ // we'd end up sinking both muls.
+ if (AddrMode.BaseReg) {
+ Value *V = AddrMode.BaseReg;
+ if (V->getType()->isPointerTy())
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
+ if (V->getType() != IntPtrTy)
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
+ Result = V;
+ }
+
+ // Add the scale value.
+ if (AddrMode.Scale) {
+ Value *V = AddrMode.ScaledReg;
+ if (V->getType() == IntPtrTy) {
+ // done.
+ } else if (V->getType()->isPointerTy()) {
+ V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
+ } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth()) {
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
+ } else {
+ // It is only safe to sign extend the BaseReg if we know that the math
+ // required to create it did not overflow before we extend it. Since
+ // the original IR value was tossed in favor of a constant back when
+ // the AddrMode was created we need to bail out gracefully if widths
+ // do not match instead of extending it.
+ Instruction *I = dyn_cast_or_null<Instruction>(Result);
+ if (I && (Result != AddrMode.BaseReg))
+ I->eraseFromParent();
+ return false;
+ }
+ if (AddrMode.Scale != 1)
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
+ if (Result)
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
+ else
+ Result = V;
+ }
+
+ // Add in the BaseGV if present.
+ if (AddrMode.BaseGV) {
+ Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
+ if (Result)
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
+ else
+ Result = V;
+ }
+
+ // Add in the Base Offset if present.
+ if (AddrMode.BaseOffs) {
+ Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ if (Result)
+ Result = Builder.CreateAdd(Result, V, "sunkaddr");
+ else
+ Result = V;
+ }
+
+ if (!Result)
+ SunkAddr = Constant::getNullValue(Addr->getType());
+ else
+ SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
+ }
+
+ MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+
+ // If we have no uses, recursively delete the value and all dead instructions
+ // using it.
+ if (Repl->use_empty()) {
+ // This can cause recursive deletion, which can invalidate our iterator.
+ // Use a WeakVH to hold onto it in case this happens.
+ Value *CurValue = &*CurInstIterator;
+ WeakVH IterHandle(CurValue);
+ BasicBlock *BB = CurInstIterator->getParent();
+
+ RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
+
+ if (IterHandle != CurValue) {
+ // If the iterator instruction was recursively deleted, start over at the
+ // start of the block.
+ CurInstIterator = BB->begin();
+ SunkAddrs.clear();
+ }
+ }
+ ++NumMemoryInsts;
+ return true;
+}
+
+/// If there are any memory operands, use OptimizeMemoryInst to sink their
+/// address computing into the block when possible / profitable.
+bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
+ bool MadeChange = false;
+
+ const TargetRegisterInfo *TRI =
+ TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI->ParseConstraints(*DL, TRI, CS);
+ unsigned ArgNo = 0;
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.isIndirect) {
+ Value *OpVal = CS->getArgOperand(ArgNo++);
+ MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
+ } else if (OpInfo.Type == InlineAsm::isInput)
+ ArgNo++;
+ }
+
+ return MadeChange;
+}
+
+/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
+/// sign extensions.
+static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
+ assert(!Inst->use_empty() && "Input must have at least one use");
+ const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
+ bool IsSExt = isa<SExtInst>(FirstUser);
+ Type *ExtTy = FirstUser->getType();
+ for (const User *U : Inst->users()) {
+ const Instruction *UI = cast<Instruction>(U);
+ if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
+ return false;
+ Type *CurTy = UI->getType();
+ // Same input and output types: Same instruction after CSE.
+ if (CurTy == ExtTy)
+ continue;
+
+ // If IsSExt is true, we are in this situation:
+ // a = Inst
+ // b = sext ty1 a to ty2
+ // c = sext ty1 a to ty3
+ // Assuming ty2 is shorter than ty3, this could be turned into:
+ // a = Inst
+ // b = sext ty1 a to ty2
+ // c = sext ty2 b to ty3
+ // However, the last sext is not free.
+ if (IsSExt)
+ return false;
+
+ // This is a ZExt, maybe this is free to extend from one type to another.
+ // In that case, we would not account for a different use.
+ Type *NarrowTy;
+ Type *LargeTy;
+ if (ExtTy->getScalarType()->getIntegerBitWidth() >
+ CurTy->getScalarType()->getIntegerBitWidth()) {
+ NarrowTy = CurTy;
+ LargeTy = ExtTy;
+ } else {
+ NarrowTy = ExtTy;
+ LargeTy = CurTy;
+ }
+
+ if (!TLI.isZExtFree(NarrowTy, LargeTy))
+ return false;
+ }
+ // All uses are the same or can be derived from one another for free.
+ return true;
+}
+
+/// \brief Try to form ExtLd by promoting \p Exts until they reach a
+/// load instruction.
+/// If an ext(load) can be formed, it is returned via \p LI for the load
+/// and \p Inst for the extension.
+/// Otherwise LI == nullptr and Inst == nullptr.
+/// When some promotion happened, \p TPT contains the proper state to
+/// revert them.
+///
+/// \return true when promoting was necessary to expose the ext(load)
+/// opportunity, false otherwise.
+///
+/// Example:
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+/// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Thanks to the promotion, we can match zext(load i32*) to i64.
+bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
+ LoadInst *&LI, Instruction *&Inst,
+ const SmallVectorImpl<Instruction *> &Exts,
+ unsigned CreatedInstsCost = 0) {
+ // Iterate over all the extensions to see if one form an ext(load).
+ for (auto I : Exts) {
+ // Check if we directly have ext(load).
+ if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
+ Inst = I;
+ // No promotion happened here.
+ return false;
+ }
+ // Check whether or not we want to do any promotion.
+ if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
+ continue;
+ // Get the action to perform the promotion.
+ TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
+ I, InsertedInsts, *TLI, PromotedInsts);
+ // Check if we can promote.
+ if (!TPH)
+ continue;
+ // Save the current state.
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 4> NewExts;
+ unsigned NewCreatedInstsCost = 0;
+ unsigned ExtCost = !TLI->isExtFree(I);
+ // Promote.
+ Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
+ &NewExts, nullptr, *TLI);
+ assert(PromotedVal &&
+ "TypePromotionHelper should have filtered out those cases");
+
+ // We would be able to merge only one extension in a load.
+ // Therefore, if we have more than 1 new extension we heuristically
+ // cut this search path, because it means we degrade the code quality.
+ // With exactly 2, the transformation is neutral, because we will merge
+ // one extension but leave one. However, we optimistically keep going,
+ // because the new extension may be removed too.
+ long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
+ TotalCreatedInstsCost -= ExtCost;
+ if (!StressExtLdPromotion &&
+ (TotalCreatedInstsCost > 1 ||
+ !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
+ // The promotion is not profitable, rollback to the previous state.
+ TPT.rollback(LastKnownGood);
+ continue;
+ }
+ // The promotion is profitable.
+ // Check if it exposes an ext(load).
+ (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+ if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
+ // If we have created a new extension, i.e., now we have two
+ // extensions. We must make sure one of them is merged with
+ // the load, otherwise we may degrade the code quality.
+ (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
+ // Promotion happened.
+ return true;
+ // If this does not help to expose an ext(load) then, rollback.
+ TPT.rollback(LastKnownGood);
+ }
+ // None of the extension can form an ext(load).
+ LI = nullptr;
+ Inst = nullptr;
+ return false;
+}
+
+/// Move a zext or sext fed by a load into the same basic block as the load,
+/// unless conditions are unfavorable. This allows SelectionDAG to fold the
+/// extend into the load.
+/// \p I[in/out] the extension may be modified during the process if some
+/// promotions apply.
+///
+bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
+ // Try to promote a chain of computation if it allows to form
+ // an extended load.
+ TypePromotionTransaction TPT;
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 1> Exts;
+ Exts.push_back(I);
+ // Look for a load being extended.
+ LoadInst *LI = nullptr;
+ Instruction *OldExt = I;
+ bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
+ if (!LI || !I) {
+ assert(!HasPromoted && !LI && "If we did not match any load instruction "
+ "the code must remain the same");
+ I = OldExt;
+ return false;
+ }
+
+ // If they're already in the same block, there's nothing to do.
+ // Make the cheap checks first if we did not promote.
+ // If we promoted, we need to check if it is indeed profitable.
+ if (!HasPromoted && LI->getParent() == I->getParent())
+ return false;
+
+ EVT VT = TLI->getValueType(*DL, I->getType());
+ EVT LoadVT = TLI->getValueType(*DL, LI->getType());
+
+ // If the load has other users and the truncate is not free, this probably
+ // isn't worthwhile.
+ if (!LI->hasOneUse() && TLI &&
+ (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
+ !TLI->isTruncateFree(I->getType(), LI->getType())) {
+ I = OldExt;
+ TPT.rollback(LastKnownGood);
+ return false;
+ }
+
+ // Check whether the target supports casts folded into loads.
+ unsigned LType;
+ if (isa<ZExtInst>(I))
+ LType = ISD::ZEXTLOAD;
+ else {
+ assert(isa<SExtInst>(I) && "Unexpected ext type!");
+ LType = ISD::SEXTLOAD;
+ }
+ if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
+ I = OldExt;
+ TPT.rollback(LastKnownGood);
+ return false;
+ }
+
+ // Move the extend into the same block as the load, so that SelectionDAG
+ // can fold it.
+ TPT.commit();
+ I->removeFromParent();
+ I->insertAfter(LI);
+ ++NumExtsMoved;
+ return true;
+}
+
+bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
+ BasicBlock *DefBB = I->getParent();
+
+ // If the result of a {s|z}ext and its source are both live out, rewrite all
+ // other uses of the source with result of extension.
+ Value *Src = I->getOperand(0);
+ if (Src->hasOneUse())
+ return false;
+
+ // Only do this xform if truncating is free.
+ if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
+ return false;
+
+ // Only safe to perform the optimization if the source is also defined in
+ // this block.
+ if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+ return false;
+
+ bool DefIsLiveOut = false;
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = UI->getParent();
+ if (UserBB == DefBB) continue;
+ DefIsLiveOut = true;
+ break;
+ }
+ if (!DefIsLiveOut)
+ return false;
+
+ // Make sure none of the uses are PHI nodes.
+ for (User *U : Src->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ BasicBlock *UserBB = UI->getParent();
+ if (UserBB == DefBB) continue;
+ // Be conservative. We don't want this xform to end up introducing
+ // reloads just before load / store instructions.
+ if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
+ return false;
+ }
+
+ // InsertedTruncs - Only insert one trunc in each block once.
+ DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+
+ bool MadeChange = false;
+ for (Use &U : Src->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = User->getParent();
+ if (UserBB == DefBB) continue;
+
+ // Both src and def are live in this block. Rewrite the use.
+ Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+ if (!InsertedTrunc) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
+ InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
+ InsertedInsts.insert(InsertedTrunc);
+ }
+
+ // Replace a use of the {s|z}ext source with a use of the result.
+ U = InsertedTrunc;
+ ++NumExtUses;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+// Find loads whose uses only use some of the loaded value's bits. Add an "and"
+// just after the load if the target can fold this into one extload instruction,
+// with the hope of eliminating some of the other later "and" instructions using
+// the loaded value. "and"s that are made trivially redundant by the insertion
+// of the new "and" are removed by this function, while others (e.g. those whose
+// path from the load goes through a phi) are left for isel to potentially
+// remove.
+//
+// For example:
+//
+// b0:
+// x = load i32
+// ...
+// b1:
+// y = and x, 0xff
+// z = use y
+//
+// becomes:
+//
+// b0:
+// x = load i32
+// x' = and x, 0xff
+// ...
+// b1:
+// z = use x'
+//
+// whereas:
+//
+// b0:
+// x1 = load i32
+// ...
+// b1:
+// x2 = load i32
+// ...
+// b2:
+// x = phi x1, x2
+// y = and x, 0xff
+//
+// becomes (after a call to optimizeLoadExt for each load):
+//
+// b0:
+// x1 = load i32
+// x1' = and x1, 0xff
+// ...
+// b1:
+// x2 = load i32
+// x2' = and x2, 0xff
+// ...
+// b2:
+// x = phi x1', x2'
+// y = and x, 0xff
+//
+
+bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
+
+ if (!Load->isSimple() ||
+ !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
+ return false;
+
+ // Skip loads we've already transformed or have no reason to transform.
+ if (Load->hasOneUse()) {
+ User *LoadUser = *Load->user_begin();
+ if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
+ !dyn_cast<PHINode>(LoadUser))
+ return false;
+ }
+
+ // Look at all uses of Load, looking through phis, to determine how many bits
+ // of the loaded value are needed.
+ SmallVector<Instruction *, 8> WorkList;
+ SmallPtrSet<Instruction *, 16> Visited;
+ SmallVector<Instruction *, 8> AndsToMaybeRemove;
+ for (auto *U : Load->users())
+ WorkList.push_back(cast<Instruction>(U));
+
+ EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
+ unsigned BitWidth = LoadResultVT.getSizeInBits();
+ APInt DemandBits(BitWidth, 0);
+ APInt WidestAndBits(BitWidth, 0);
+
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.back();
+ WorkList.pop_back();
+
+ // Break use-def graph loops.
+ if (!Visited.insert(I).second)
+ continue;
+
+ // For a PHI node, push all of its users.
+ if (auto *Phi = dyn_cast<PHINode>(I)) {
+ for (auto *U : Phi->users())
+ WorkList.push_back(cast<Instruction>(U));
+ continue;
+ }
+
+ switch (I->getOpcode()) {
+ case llvm::Instruction::And: {
+ auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!AndC)
+ return false;
+ APInt AndBits = AndC->getValue();
+ DemandBits |= AndBits;
+ // Keep track of the widest and mask we see.
+ if (AndBits.ugt(WidestAndBits))
+ WidestAndBits = AndBits;
+ if (AndBits == WidestAndBits && I->getOperand(0) == Load)
+ AndsToMaybeRemove.push_back(I);
+ break;
+ }
+
+ case llvm::Instruction::Shl: {
+ auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!ShlC)
+ return false;
+ uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
+ auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt);
+ DemandBits |= ShlDemandBits;
+ break;
+ }
+
+ case llvm::Instruction::Trunc: {
+ EVT TruncVT = TLI->getValueType(*DL, I->getType());
+ unsigned TruncBitWidth = TruncVT.getSizeInBits();
+ auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth);
+ DemandBits |= TruncBits;
+ break;
+ }
+
+ default:
+ return false;
+ }
+ }
+
+ uint32_t ActiveBits = DemandBits.getActiveBits();
+ // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
+ // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
+ // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
+ // (and (load x) 1) is not matched as a single instruction, rather as a LDR
+ // followed by an AND.
+ // TODO: Look into removing this restriction by fixing backends to either
+ // return false for isLoadExtLegal for i1 or have them select this pattern to
+ // a single instruction.
+ //
+ // Also avoid hoisting if we didn't see any ands with the exact DemandBits
+ // mask, since these are the only ands that will be removed by isel.
+ if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
+ WidestAndBits != DemandBits)
+ return false;
+
+ LLVMContext &Ctx = Load->getType()->getContext();
+ Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
+ EVT TruncVT = TLI->getValueType(*DL, TruncTy);
+
+ // Reject cases that won't be matched as extloads.
+ if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
+ !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
+ return false;
+
+ IRBuilder<> Builder(Load->getNextNode());
+ auto *NewAnd = dyn_cast<Instruction>(
+ Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
+
+ // Replace all uses of load with new and (except for the use of load in the
+ // new and itself).
+ Load->replaceAllUsesWith(NewAnd);
+ NewAnd->setOperand(0, Load);
+
+ // Remove any and instructions that are now redundant.
+ for (auto *And : AndsToMaybeRemove)
+ // Check that the and mask is the same as the one we decided to put on the
+ // new and.
+ if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
+ And->replaceAllUsesWith(NewAnd);
+ if (&*CurInstIterator == And)
+ CurInstIterator = std::next(And->getIterator());
+ And->eraseFromParent();
+ ++NumAndUses;
+ }
+
+ ++NumAndsAdded;
+ return true;
+}
+
+/// Check if V (an operand of a select instruction) is an expensive instruction
+/// that is only used once.
+static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ // If it's safe to speculatively execute, then it should not have side
+ // effects; therefore, it's safe to sink and possibly *not* execute.
+ return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
+ TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;
+}
+
+/// Returns true if a SelectInst should be turned into an explicit branch.
+static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+ const TargetLowering *TLI,
+ SelectInst *SI) {
+ // If even a predictable select is cheap, then a branch can't be cheaper.
+ if (!TLI->isPredictableSelectExpensive())
+ return false;
+
+ // FIXME: This should use the same heuristics as IfConversion to determine
+ // whether a select is better represented as a branch.
+
+ // If metadata tells us that the select condition is obviously predictable,
+ // then we want to replace the select with a branch.
+ uint64_t TrueWeight, FalseWeight;
+ if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t Max = std::max(TrueWeight, FalseWeight);
+ uint64_t Sum = TrueWeight + FalseWeight;
+ if (Sum != 0) {
+ auto Probability = BranchProbability::getBranchProbability(Max, Sum);
+ if (Probability > TLI->getPredictableBranchThreshold())
+ return true;
+ }
+ }
+
+ CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+
+ // If a branch is predictable, an out-of-order CPU can avoid blocking on its
+ // comparison condition. If the compare has more than one use, there's
+ // probably another cmov or setcc around, so it's not worth emitting a branch.
+ if (!Cmp || !Cmp->hasOneUse())
+ return false;
+
+ // If either operand of the select is expensive and only needed on one side
+ // of the select, we should form a branch.
+ if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
+ sinkSelectOperand(TTI, SI->getFalseValue()))
+ return true;
+
+ return false;
+}
+
+
+/// If we have a SelectInst that will likely profit from branch prediction,
+/// turn it into a branch.
+bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
+ bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
+
+ // Can we convert the 'select' to CF ?
+ if (DisableSelectToBranch || OptSize || !TLI || VectorCond ||
+ SI->getMetadata(LLVMContext::MD_unpredictable))
+ return false;
+
+ TargetLowering::SelectSupportKind SelectKind;
+ if (VectorCond)
+ SelectKind = TargetLowering::VectorMaskSelect;
+ else if (SI->getType()->isVectorTy())
+ SelectKind = TargetLowering::ScalarCondVectorVal;
+ else
+ SelectKind = TargetLowering::ScalarValSelect;
+
+ if (TLI->isSelectSupported(SelectKind) &&
+ !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
+ return false;
+
+ ModifiedDT = true;
+
+ // Transform a sequence like this:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %sel = select i1 %cmp, i32 %c, i32 %d
+ //
+ // Into:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // br i1 %cmp, label %select.true, label %select.false
+ // select.true:
+ // br label %select.end
+ // select.false:
+ // br label %select.end
+ // select.end:
+ // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+ //
+ // In addition, we may sink instructions that produce %c or %d from
+ // the entry block into the destination(s) of the new branch.
+ // If the true or false blocks do not contain a sunken instruction, that
+ // block and its branch may be optimized away. In that case, one side of the
+ // first branch will point directly to select.end, and the corresponding PHI
+ // predecessor block will be the start block.
+
+ // First, we split the block containing the select into 2 blocks.
+ BasicBlock *StartBlock = SI->getParent();
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
+ BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+
+ // Delete the unconditional branch that was just created by the split.
+ StartBlock->getTerminator()->eraseFromParent();
+
+ // These are the new basic blocks for the conditional branch.
+ // At least one will become an actual new basic block.
+ BasicBlock *TrueBlock = nullptr;
+ BasicBlock *FalseBlock = nullptr;
+
+ // Sink expensive instructions into the conditional blocks to avoid executing
+ // them speculatively.
+ if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+ TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
+ EndBlock->getParent(), EndBlock);
+ auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ auto *TrueInst = cast<Instruction>(SI->getTrueValue());
+ TrueInst->moveBefore(TrueBranch);
+ }
+ if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
+ EndBlock->getParent(), EndBlock);
+ auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ auto *FalseInst = cast<Instruction>(SI->getFalseValue());
+ FalseInst->moveBefore(FalseBranch);
+ }
+
+ // If there was nothing to sink, then arbitrarily choose the 'false' side
+ // for a new input value to the PHI.
+ if (TrueBlock == FalseBlock) {
+ assert(TrueBlock == nullptr &&
+ "Unexpected basic block transform while optimizing select");
+
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+ EndBlock->getParent(), EndBlock);
+ BranchInst::Create(EndBlock, FalseBlock);
+ }
+
+ // Insert the real conditional branch based on the original condition.
+ // If we did not create a new block for one of the 'true' or 'false' paths
+ // of the condition, it means that side of the branch goes to the end block
+ // directly and the path originates from the start block from the point of
+ // view of the new PHI.
+ if (TrueBlock == nullptr) {
+ BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
+ TrueBlock = StartBlock;
+ } else if (FalseBlock == nullptr) {
+ BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
+ FalseBlock = StartBlock;
+ } else {
+ BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
+ }
+
+ // The select itself is replaced with a PHI Node.
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+ PN->takeName(SI);
+ PN->addIncoming(SI->getTrueValue(), TrueBlock);
+ PN->addIncoming(SI->getFalseValue(), FalseBlock);
+
+ SI->replaceAllUsesWith(PN);
+ SI->eraseFromParent();
+
+ // Instruct OptimizeBlock to skip to the next block.
+ CurInstIterator = StartBlock->end();
+ ++NumSelectsExpanded;
+ return true;
+}
+
+static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
+ SmallVector<int, 16> Mask(SVI->getShuffleMask());
+ int SplatElem = -1;
+ for (unsigned i = 0; i < Mask.size(); ++i) {
+ if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
+ return false;
+ SplatElem = Mask[i];
+ }
+
+ return true;
+}
+
+/// Some targets have expensive vector shifts if the lanes aren't all the same
+/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
+/// it's often worth sinking a shufflevector splat down to its use so that
+/// codegen can spot all lanes are identical.
+bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+ BasicBlock *DefBB = SVI->getParent();
+
+ // Only do this xform if variable vector shifts are particularly expensive.
+ if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
+ return false;
+
+ // We only expect better codegen by sinking a shuffle if we can recognise a
+ // constant splat.
+ if (!isBroadcastShuffle(SVI))
+ return false;
+
+ // InsertedShuffles - Only insert a shuffle in each block once.
+ DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
+
+ bool MadeChange = false;
+ for (User *U : SVI->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ // Figure out which BB this ext is used in.
+ BasicBlock *UserBB = UI->getParent();
+ if (UserBB == DefBB) continue;
+
+ // For now only apply this when the splat is used by a shift instruction.
+ if (!UI->isShift()) continue;
+
+ // Everything checks out, sink the shuffle if the user's block doesn't
+ // already have a copy.
+ Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
+
+ if (!InsertedShuffle) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+ assert(InsertPt != UserBB->end());
+ InsertedShuffle =
+ new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
+ SVI->getOperand(2), "", &*InsertPt);
+ }
+
+ UI->replaceUsesOfWith(SVI, InsertedShuffle);
+ MadeChange = true;
+ }
+
+ // If we removed all uses, nuke the shuffle.
+ if (SVI->use_empty()) {
+ SVI->eraseFromParent();
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+ if (!TLI || !DL)
+ return false;
+
+ Value *Cond = SI->getCondition();
+ Type *OldType = Cond->getType();
+ LLVMContext &Context = Cond->getContext();
+ MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
+ unsigned RegWidth = RegType.getSizeInBits();
+
+ if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
+ return false;
+
+ // If the register width is greater than the type width, expand the condition
+ // of the switch instruction and each case constant to the width of the
+ // register. By widening the type of the switch condition, subsequent
+ // comparisons (for case comparisons) will not need to be extended to the
+ // preferred register width, so we will potentially eliminate N-1 extends,
+ // where N is the number of cases in the switch.
+ auto *NewType = Type::getIntNTy(Context, RegWidth);
+
+ // Zero-extend the switch condition and case constants unless the switch
+ // condition is a function argument that is already being sign-extended.
+ // In that case, we can avoid an unnecessary mask/extension by sign-extending
+ // everything instead.
+ Instruction::CastOps ExtType = Instruction::ZExt;
+ if (auto *Arg = dyn_cast<Argument>(Cond))
+ if (Arg->hasSExtAttr())
+ ExtType = Instruction::SExt;
+
+ auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
+ ExtInst->insertBefore(SI);
+ SI->setCondition(ExtInst);
+ for (SwitchInst::CaseIt Case : SI->cases()) {
+ APInt NarrowConst = Case.getCaseValue()->getValue();
+ APInt WideConst = (ExtType == Instruction::ZExt) ?
+ NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
+ Case.setValue(ConstantInt::get(Context, WideConst));
+ }
+
+ return true;
+}
+
+namespace {
+/// \brief Helper class to promote a scalar operation to a vector one.
+/// This class is used to move downward extractelement transition.
+/// E.g.,
+/// a = vector_op <2 x i32>
+/// b = extractelement <2 x i32> a, i32 0
+/// c = scalar_op b
+/// store c
+///
+/// =>
+/// a = vector_op <2 x i32>
+/// c = vector_op a (equivalent to scalar_op on the related lane)
+/// * d = extractelement <2 x i32> c, i32 0
+/// * store d
+/// Assuming both extractelement and store can be combine, we get rid of the
+/// transition.
+class VectorPromoteHelper {
+ /// DataLayout associated with the current module.
+ const DataLayout &DL;
+
+ /// Used to perform some checks on the legality of vector operations.
+ const TargetLowering &TLI;
+
+ /// Used to estimated the cost of the promoted chain.
+ const TargetTransformInfo &TTI;
+
+ /// The transition being moved downwards.
+ Instruction *Transition;
+ /// The sequence of instructions to be promoted.
+ SmallVector<Instruction *, 4> InstsToBePromoted;
+ /// Cost of combining a store and an extract.
+ unsigned StoreExtractCombineCost;
+ /// Instruction that will be combined with the transition.
+ Instruction *CombineInst;
+
+ /// \brief The instruction that represents the current end of the transition.
+ /// Since we are faking the promotion until we reach the end of the chain
+ /// of computation, we need a way to get the current end of the transition.
+ Instruction *getEndOfTransition() const {
+ if (InstsToBePromoted.empty())
+ return Transition;
+ return InstsToBePromoted.back();
+ }
+
+ /// \brief Return the index of the original value in the transition.
+ /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
+ /// c, is at index 0.
+ unsigned getTransitionOriginalValueIdx() const {
+ assert(isa<ExtractElementInst>(Transition) &&
+ "Other kind of transitions are not supported yet");
+ return 0;
+ }
+
+ /// \brief Return the index of the index in the transition.
+ /// E.g., for "extractelement <2 x i32> c, i32 0" the index
+ /// is at index 1.
+ unsigned getTransitionIdx() const {
+ assert(isa<ExtractElementInst>(Transition) &&
+ "Other kind of transitions are not supported yet");
+ return 1;
+ }
+
+ /// \brief Get the type of the transition.
+ /// This is the type of the original value.
+ /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
+ /// transition is <2 x i32>.
+ Type *getTransitionType() const {
+ return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
+ }
+
+ /// \brief Promote \p ToBePromoted by moving \p Def downward through.
+ /// I.e., we have the following sequence:
+ /// Def = Transition <ty1> a to <ty2>
+ /// b = ToBePromoted <ty2> Def, ...
+ /// =>
+ /// b = ToBePromoted <ty1> a, ...
+ /// Def = Transition <ty1> ToBePromoted to <ty2>
+ void promoteImpl(Instruction *ToBePromoted);
+
+ /// \brief Check whether or not it is profitable to promote all the
+ /// instructions enqueued to be promoted.
+ bool isProfitableToPromote() {
+ Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
+ unsigned Index = isa<ConstantInt>(ValIdx)
+ ? cast<ConstantInt>(ValIdx)->getZExtValue()
+ : -1;
+ Type *PromotedType = getTransitionType();
+
+ StoreInst *ST = cast<StoreInst>(CombineInst);
+ unsigned AS = ST->getPointerAddressSpace();
+ unsigned Align = ST->getAlignment();
+ // Check if this store is supported.
+ if (!TLI.allowsMisalignedMemoryAccesses(
+ TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
+ Align)) {
+ // If this is not supported, there is no way we can combine
+ // the extract with the store.
+ return false;
+ }
+
+ // The scalar chain of computation has to pay for the transition
+ // scalar to vector.
+ // The vector chain has to account for the combining cost.
+ uint64_t ScalarCost =
+ TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
+ uint64_t VectorCost = StoreExtractCombineCost;
+ for (const auto &Inst : InstsToBePromoted) {
+ // Compute the cost.
+ // By construction, all instructions being promoted are arithmetic ones.
+ // Moreover, one argument is a constant that can be viewed as a splat
+ // constant.
+ Value *Arg0 = Inst->getOperand(0);
+ bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
+ isa<ConstantFP>(Arg0);
+ TargetTransformInfo::OperandValueKind Arg0OVK =
+ IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
+ : TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Arg1OVK =
+ !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
+ : TargetTransformInfo::OK_AnyValue;
+ ScalarCost += TTI.getArithmeticInstrCost(
+ Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
+ VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
+ Arg0OVK, Arg1OVK);
+ }
+ DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
+ << ScalarCost << "\nVector: " << VectorCost << '\n');
+ return ScalarCost > VectorCost;
+ }
+
+ /// \brief Generate a constant vector with \p Val with the same
+ /// number of elements as the transition.
+ /// \p UseSplat defines whether or not \p Val should be replicated
+ /// across the whole vector.
+ /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
+ /// otherwise we generate a vector with as many undef as possible:
+ /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
+ /// used at the index of the extract.
+ Value *getConstantVector(Constant *Val, bool UseSplat) const {
+ unsigned ExtractIdx = UINT_MAX;
+ if (!UseSplat) {
+ // If we cannot determine where the constant must be, we have to
+ // use a splat constant.
+ Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
+ if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
+ ExtractIdx = CstVal->getSExtValue();
+ else
+ UseSplat = true;
+ }
+
+ unsigned End = getTransitionType()->getVectorNumElements();
+ if (UseSplat)
+ return ConstantVector::getSplat(End, Val);
+
+ SmallVector<Constant *, 4> ConstVec;
+ UndefValue *UndefVal = UndefValue::get(Val->getType());
+ for (unsigned Idx = 0; Idx != End; ++Idx) {
+ if (Idx == ExtractIdx)
+ ConstVec.push_back(Val);
+ else
+ ConstVec.push_back(UndefVal);
+ }
+ return ConstantVector::get(ConstVec);
+ }
+
+ /// \brief Check if promoting to a vector type an operand at \p OperandIdx
+ /// in \p Use can trigger undefined behavior.
+ static bool canCauseUndefinedBehavior(const Instruction *Use,
+ unsigned OperandIdx) {
+ // This is not safe to introduce undef when the operand is on
+ // the right hand side of a division-like instruction.
+ if (OperandIdx != 1)
+ return false;
+ switch (Use->getOpcode()) {
+ default:
+ return false;
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ return true;
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ return !Use->hasNoNaNs();
+ }
+ llvm_unreachable(nullptr);
+ }
+
+public:
+ VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
+ const TargetTransformInfo &TTI, Instruction *Transition,
+ unsigned CombineCost)
+ : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
+ StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
+ assert(Transition && "Do not know how to promote null");
+ }
+
+ /// \brief Check if we can promote \p ToBePromoted to \p Type.
+ bool canPromote(const Instruction *ToBePromoted) const {
+ // We could support CastInst too.
+ return isa<BinaryOperator>(ToBePromoted);
+ }
+
+ /// \brief Check if it is profitable to promote \p ToBePromoted
+ /// by moving downward the transition through.
+ bool shouldPromote(const Instruction *ToBePromoted) const {
+ // Promote only if all the operands can be statically expanded.
+ // Indeed, we do not want to introduce any new kind of transitions.
+ for (const Use &U : ToBePromoted->operands()) {
+ const Value *Val = U.get();
+ if (Val == getEndOfTransition()) {
+ // If the use is a division and the transition is on the rhs,
+ // we cannot promote the operation, otherwise we may create a
+ // division by zero.
+ if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
+ return false;
+ continue;
+ }
+ if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
+ !isa<ConstantFP>(Val))
+ return false;
+ }
+ // Check that the resulting operation is legal.
+ int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
+ if (!ISDOpcode)
+ return false;
+ return StressStoreExtract ||
+ TLI.isOperationLegalOrCustom(
+ ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
+ }
+
+ /// \brief Check whether or not \p Use can be combined
+ /// with the transition.
+ /// I.e., is it possible to do Use(Transition) => AnotherUse?
+ bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
+
+ /// \brief Record \p ToBePromoted as part of the chain to be promoted.
+ void enqueueForPromotion(Instruction *ToBePromoted) {
+ InstsToBePromoted.push_back(ToBePromoted);
+ }
+
+ /// \brief Set the instruction that will be combined with the transition.
+ void recordCombineInstruction(Instruction *ToBeCombined) {
+ assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
+ CombineInst = ToBeCombined;
+ }
+
+ /// \brief Promote all the instructions enqueued for promotion if it is
+ /// is profitable.
+ /// \return True if the promotion happened, false otherwise.
+ bool promote() {
+ // Check if there is something to promote.
+ // Right now, if we do not have anything to combine with,
+ // we assume the promotion is not profitable.
+ if (InstsToBePromoted.empty() || !CombineInst)
+ return false;
+
+ // Check cost.
+ if (!StressStoreExtract && !isProfitableToPromote())
+ return false;
+
+ // Promote.
+ for (auto &ToBePromoted : InstsToBePromoted)
+ promoteImpl(ToBePromoted);
+ InstsToBePromoted.clear();
+ return true;
+ }
+};
+} // End of anonymous namespace.
+
+void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
+ // At this point, we know that all the operands of ToBePromoted but Def
+ // can be statically promoted.
+ // For Def, we need to use its parameter in ToBePromoted:
+ // b = ToBePromoted ty1 a
+ // Def = Transition ty1 b to ty2
+ // Move the transition down.
+ // 1. Replace all uses of the promoted operation by the transition.
+ // = ... b => = ... Def.
+ assert(ToBePromoted->getType() == Transition->getType() &&
+ "The type of the result of the transition does not match "
+ "the final type");
+ ToBePromoted->replaceAllUsesWith(Transition);
+ // 2. Update the type of the uses.
+ // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
+ Type *TransitionTy = getTransitionType();
+ ToBePromoted->mutateType(TransitionTy);
+ // 3. Update all the operands of the promoted operation with promoted
+ // operands.
+ // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
+ for (Use &U : ToBePromoted->operands()) {
+ Value *Val = U.get();
+ Value *NewVal = nullptr;
+ if (Val == Transition)
+ NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
+ else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
+ isa<ConstantFP>(Val)) {
+ // Use a splat constant if it is not safe to use undef.
+ NewVal = getConstantVector(
+ cast<Constant>(Val),
+ isa<UndefValue>(Val) ||
+ canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
+ } else
+ llvm_unreachable("Did you modified shouldPromote and forgot to update "
+ "this?");
+ ToBePromoted->setOperand(U.getOperandNo(), NewVal);
+ }
+ Transition->removeFromParent();
+ Transition->insertAfter(ToBePromoted);
+ Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
+}
+
+/// Some targets can do store(extractelement) with one instruction.
+/// Try to push the extractelement towards the stores when the target
+/// has this feature and this is profitable.
+bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
+ unsigned CombineCost = UINT_MAX;
+ if (DisableStoreExtract || !TLI ||
+ (!StressStoreExtract &&
+ !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
+ Inst->getOperand(1), CombineCost)))
+ return false;
+
+ // At this point we know that Inst is a vector to scalar transition.
+ // Try to move it down the def-use chain, until:
+ // - We can combine the transition with its single use
+ // => we got rid of the transition.
+ // - We escape the current basic block
+ // => we would need to check that we are moving it at a cheaper place and
+ // we do not do that for now.
+ BasicBlock *Parent = Inst->getParent();
+ DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
+ VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
+ // If the transition has more than one use, assume this is not going to be
+ // beneficial.
+ while (Inst->hasOneUse()) {
+ Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
+ DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
+
+ if (ToBePromoted->getParent() != Parent) {
+ DEBUG(dbgs() << "Instruction to promote is in a different block ("
+ << ToBePromoted->getParent()->getName()
+ << ") than the transition (" << Parent->getName() << ").\n");
+ return false;
+ }
+
+ if (VPH.canCombine(ToBePromoted)) {
+ DEBUG(dbgs() << "Assume " << *Inst << '\n'
+ << "will be combined with: " << *ToBePromoted << '\n');
+ VPH.recordCombineInstruction(ToBePromoted);
+ bool Changed = VPH.promote();
+ NumStoreExtractExposed += Changed;
+ return Changed;
+ }
+
+ DEBUG(dbgs() << "Try promoting.\n");
+ if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
+ return false;
+
+ DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
+
+ VPH.enqueueForPromotion(ToBePromoted);
+ Inst = ToBePromoted;
+ }
+ return false;
+}
+
+bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
+ // Bail out if we inserted the instruction to prevent optimizations from
+ // stepping on each other's toes.
+ if (InsertedInsts.count(I))
+ return false;
+
+ if (PHINode *P = dyn_cast<PHINode>(I)) {
+ // It is possible for very late stage optimizations (such as SimplifyCFG)
+ // to introduce PHI nodes too late to be cleaned up. If we detect such a
+ // trivial PHI, go ahead and zap it here.
+ if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) {
+ P->replaceAllUsesWith(V);
+ P->eraseFromParent();
+ ++NumPHIsElim;
+ return true;
+ }
+ return false;
+ }
+
+ if (CastInst *CI = dyn_cast<CastInst>(I)) {
+ // If the source of the cast is a constant, then this should have
+ // already been constant folded. The only reason NOT to constant fold
+ // it is if something (e.g. LSR) was careful to place the constant
+ // evaluation in a block other than then one that uses it (e.g. to hoist
+ // the address of globals out of a loop). If this is the case, we don't
+ // want to forward-subst the cast.
+ if (isa<Constant>(CI->getOperand(0)))
+ return false;
+
+ if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
+ return true;
+
+ if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+ /// Sink a zext or sext into its user blocks if the target type doesn't
+ /// fit in one register
+ if (TLI &&
+ TLI->getTypeAction(CI->getContext(),
+ TLI->getValueType(*DL, CI->getType())) ==
+ TargetLowering::TypeExpandInteger) {
+ return SinkCast(CI);
+ } else {
+ bool MadeChange = moveExtToFormExtLoad(I);
+ return MadeChange | optimizeExtUses(I);
+ }
+ }
+ return false;
+ }
+
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (!TLI || !TLI->hasMultipleConditionRegisters())
+ return OptimizeCmpExpression(CI, TLI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ stripInvariantGroupMetadata(*LI);
+ if (TLI) {
+ bool Modified = optimizeLoadExt(LI);
+ unsigned AS = LI->getPointerAddressSpace();
+ Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+ return Modified;
+ }
+ return false;
+ }
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ stripInvariantGroupMetadata(*SI);
+ if (TLI) {
+ unsigned AS = SI->getPointerAddressSpace();
+ return optimizeMemoryInst(I, SI->getOperand(1),
+ SI->getOperand(0)->getType(), AS);
+ }
+ return false;
+ }
+
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
+
+ if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
+ BinOp->getOpcode() == Instruction::LShr)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (TLI && CI && TLI->hasExtractBitsInsn())
+ return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
+
+ return false;
+ }
+
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ if (GEPI->hasAllZeroIndices()) {
+ /// The GEP operand must be a pointer, so must its result -> BitCast
+ Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+ GEPI->getName(), GEPI);
+ GEPI->replaceAllUsesWith(NC);
+ GEPI->eraseFromParent();
+ ++NumGEPsElim;
+ optimizeInst(NC, ModifiedDT);
+ return true;
+ }
+ return false;
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ return optimizeCallInst(CI, ModifiedDT);
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(I))
+ return optimizeSelectInst(SI);
+
+ if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
+ return optimizeShuffleVectorInst(SVI);
+
+ if (auto *Switch = dyn_cast<SwitchInst>(I))
+ return optimizeSwitchInst(Switch);
+
+ if (isa<ExtractElementInst>(I))
+ return optimizeExtractElementInst(I);
+
+ return false;
+}
+
+/// Given an OR instruction, check to see if this is a bitreverse
+/// idiom. If so, insert the new intrinsic and return true.
+static bool makeBitReverse(Instruction &I, const DataLayout &DL,
+ const TargetLowering &TLI) {
+ if (!I.getType()->isIntegerTy() ||
+ !TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
+ TLI.getValueType(DL, I.getType(), true)))
+ return false;
+
+ SmallVector<Instruction*, 4> Insts;
+ if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
+ return false;
+ Instruction *LastInst = Insts.back();
+ I.replaceAllUsesWith(LastInst);
+ RecursivelyDeleteTriviallyDeadInstructions(&I);
+ return true;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
+ SunkAddrs.clear();
+ bool MadeChange = false;
+
+ CurInstIterator = BB.begin();
+ while (CurInstIterator != BB.end()) {
+ MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
+ if (ModifiedDT)
+ return true;
+ }
+
+ bool MadeBitReverse = true;
+ while (TLI && MadeBitReverse) {
+ MadeBitReverse = false;
+ for (auto &I : reverse(BB)) {
+ if (makeBitReverse(I, *DL, *TLI)) {
+ MadeBitReverse = MadeChange = true;
+ ModifiedDT = true;
+ break;
+ }
+ }
+ }
+ MadeChange |= dupRetToEnableTailCallOpts(&BB);
+
+ return MadeChange;
+}
+
+// llvm.dbg.value is far away from the value then iSel may not be able
+// handle it properly. iSel will drop llvm.dbg.value if it can not
+// find a node corresponding to the value.
+bool CodeGenPrepare::placeDbgValues(Function &F) {
+ bool MadeChange = false;
+ for (BasicBlock &BB : F) {
+ Instruction *PrevNonDbgInst = nullptr;
+ for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
+ Instruction *Insn = &*BI++;
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+ // Leave dbg.values that refer to an alloca alone. These
+ // instrinsics describe the address of a variable (= the alloca)
+ // being taken. They should not be moved next to the alloca
+ // (and to the beginning of the scope), but rather stay close to
+ // where said address is used.
+ if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
+ PrevNonDbgInst = Insn;
+ continue;
+ }
+
+ Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
+ if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+ // If VI is a phi in a block with an EHPad terminator, we can't insert
+ // after it.
+ if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+ continue;
+ DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+// If there is a sequence that branches based on comparing a single bit
+// against zero that can be combined into a single instruction, and the
+// target supports folding these into a single instruction, sink the
+// mask and compare into the branch uses. Do this before OptimizeBlock ->
+// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
+// searched for.
+bool CodeGenPrepare::sinkAndCmp(Function &F) {
+ if (!EnableAndCmpSinking)
+ return false;
+ if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
+ return false;
+ bool MadeChange = false;
+ for (BasicBlock &BB : F) {
+ // Does this BB end with the following?
+ // %andVal = and %val, #single-bit-set
+ // %icmpVal = icmp %andResult, 0
+ // br i1 %cmpVal label %dest1, label %dest2"
+ BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator());
+ if (!Brcc || !Brcc->isConditional())
+ continue;
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
+ if (!Cmp || Cmp->getParent() != &BB)
+ continue;
+ ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
+ if (!Zero || !Zero->isZero())
+ continue;
+ Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
+ if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB)
+ continue;
+ ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
+ if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
+ continue;
+ DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump());
+
+ // Push the "and; icmp" for any users that are conditional branches.
+ // Since there can only be one branch use per BB, we don't need to keep
+ // track of which BBs we insert into.
+ for (Use &TheUse : Cmp->uses()) {
+ // Find brcc use.
+ BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
+ if (!BrccUser || !BrccUser->isConditional())
+ continue;
+ BasicBlock *UserBB = BrccUser->getParent();
+ if (UserBB == &BB) continue;
+ DEBUG(dbgs() << "found Brcc use\n");
+
+ // Sink the "and; icmp" to use.
+ MadeChange = true;
+ BinaryOperator *NewAnd =
+ BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
+ BrccUser);
+ CmpInst *NewCmp =
+ CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
+ "", BrccUser);
+ TheUse = NewCmp;
+ ++NumAndCmpsMoved;
+ DEBUG(BrccUser->getParent()->dump());
+ }
+ }
+ return MadeChange;
+}
+
+/// \brief Scale down both weights to fit into uint32_t.
+static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
+ uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
+ uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+ NewTrue = NewTrue / Scale;
+ NewFalse = NewFalse / Scale;
+}
+
+/// \brief Some targets prefer to split a conditional branch like:
+/// \code
+/// %0 = icmp ne i32 %a, 0
+/// %1 = icmp ne i32 %b, 0
+/// %or.cond = or i1 %0, %1
+/// br i1 %or.cond, label %TrueBB, label %FalseBB
+/// \endcode
+/// into multiple branch instructions like:
+/// \code
+/// bb1:
+/// %0 = icmp ne i32 %a, 0
+/// br i1 %0, label %TrueBB, label %bb2
+/// bb2:
+/// %1 = icmp ne i32 %b, 0
+/// br i1 %1, label %TrueBB, label %FalseBB
+/// \endcode
+/// This usually allows instruction selection to do even further optimizations
+/// and combine the compare with the branch instruction. Currently this is
+/// applied for targets which have "cheap" jump instructions.
+///
+/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
+///
+bool CodeGenPrepare::splitBranchCondition(Function &F) {
+ if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
+ return false;
+
+ bool MadeChange = false;
+ for (auto &BB : F) {
+ // Does this BB end with the following?
+ // %cond1 = icmp|fcmp|binary instruction ...
+ // %cond2 = icmp|fcmp|binary instruction ...
+ // %cond.or = or|and i1 %cond1, cond2
+ // br i1 %cond.or label %dest1, label %dest2"
+ BinaryOperator *LogicOp;
+ BasicBlock *TBB, *FBB;
+ if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
+ continue;
+
+ auto *Br1 = cast<BranchInst>(BB.getTerminator());
+ if (Br1->getMetadata(LLVMContext::MD_unpredictable))
+ continue;
+
+ unsigned Opc;
+ Value *Cond1, *Cond2;
+ if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
+ m_OneUse(m_Value(Cond2)))))
+ Opc = Instruction::And;
+ else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
+ m_OneUse(m_Value(Cond2)))))
+ Opc = Instruction::Or;
+ else
+ continue;
+
+ if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
+ !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
+ continue;
+
+ DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
+
+ // Create a new BB.
+ auto TmpBB =
+ BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
+ BB.getParent(), BB.getNextNode());
+
+ // Update original basic block by using the first condition directly by the
+ // branch instruction and removing the no longer needed and/or instruction.
+ Br1->setCondition(Cond1);
+ LogicOp->eraseFromParent();
+
+ // Depending on the conditon we have to either replace the true or the false
+ // successor of the original branch instruction.
+ if (Opc == Instruction::And)
+ Br1->setSuccessor(0, TmpBB);
+ else
+ Br1->setSuccessor(1, TmpBB);
+
+ // Fill in the new basic block.
+ auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
+ if (auto *I = dyn_cast<Instruction>(Cond2)) {
+ I->removeFromParent();
+ I->insertBefore(Br2);
+ }
+
+ // Update PHI nodes in both successors. The original BB needs to be
+ // replaced in one succesor's PHI nodes, because the branch comes now from
+ // the newly generated BB (NewBB). In the other successor we need to add one
+ // incoming edge to the PHI nodes, because both branch instructions target
+ // now the same successor. Depending on the original branch condition
+ // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
+ // we perfrom the correct update for the PHI nodes.
+ // This doesn't change the successor order of the just created branch
+ // instruction (or any other instruction).
+ if (Opc == Instruction::Or)
+ std::swap(TBB, FBB);
+
+ // Replace the old BB with the new BB.
+ for (auto &I : *TBB) {
+ PHINode *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+ int i;
+ while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
+ PN->setIncomingBlock(i, TmpBB);
+ }
+
+ // Add another incoming edge form the new BB.
+ for (auto &I : *FBB) {
+ PHINode *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+ auto *Val = PN->getIncomingValueForBlock(&BB);
+ PN->addIncoming(Val, TmpBB);
+ }
+
+ // Update the branch weights (from SelectionDAGBuilder::
+ // FindMergedConditions).
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for NewBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
+ // assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+ uint64_t TrueWeight, FalseWeight;
+ if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t NewTrueWeight = TrueWeight;
+ uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+
+ NewTrueWeight = TrueWeight;
+ NewFalseWeight = 2 * FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+ }
+ } else {
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for orignal BB.
+ // Assuming the orignal weights are A and B, one choice is to set BB1's
+ // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
+ // assumes that
+ // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
+ uint64_t TrueWeight, FalseWeight;
+ if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
+ uint64_t NewFalseWeight = FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+
+ NewTrueWeight = 2 * TrueWeight;
+ NewFalseWeight = FalseWeight;
+ scaleWeights(NewTrueWeight, NewFalseWeight);
+ Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
+ .createBranchWeights(TrueWeight, FalseWeight));
+ }
+ }
+
+ // Note: No point in getting fancy here, since the DT info is never
+ // available to CodeGenPrepare.
+ ModifiedDT = true;
+
+ MadeChange = true;
+
+ DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
+ TmpBB->dump());
+ }
+ return MadeChange;
+}
+
+void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
+ if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group))
+ I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
new file mode 100644
index 000000000000..a0189a172bfc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -0,0 +1,681 @@
+//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-sched"
+
+CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi,
+ const RegisterClassInfo &RCI)
+ : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()),
+ TII(MF.getSubtarget().getInstrInfo()),
+ TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI),
+ Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0),
+ DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {}
+
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
+}
+
+void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ // Clear out the register class data.
+ Classes[i] = nullptr;
+
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
+
+ // Clear "do not change" set.
+ KeepRegs.reset();
+
+ bool IsReturnBlock = BB->isReturnBlock();
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (const auto &LI : (*SI)->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
+ }
+ }
+
+ // Mark live-out callee-saved registers. In a return block this is
+ // all callee-saved registers. In non-return this is any
+ // callee-saved register that is not saved in the prolog.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ BitVector Pristine = MFI->getPristineRegs(MF);
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ if (!IsReturnBlock && !Pristine.test(*I)) continue;
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+ unsigned Reg = *AI;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BBSize;
+ DefIndices[Reg] = ~0u;
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::FinishBlock() {
+ RegRefs.clear();
+ KeepRegs.reset();
+}
+
+void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) {
+ // Kill instructions can define registers but are really nops, and there might
+ // be a real definition earlier that needs to be paired with uses dominated by
+ // this kill.
+
+ // FIXME: It may be possible to remove the isKill() restriction once PR18663
+ // has been properly fixed. There can be value in processing kills as seen in
+ // the AggressiveAntiDepBreaker class.
+ if (MI.isDebugValue() || MI.isKill())
+ return;
+ assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] != ~0u) {
+ // If Reg is currently live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled).
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = Count;
+ } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Move the def index to the end of the previous region, to reflect
+ // that the def could theoretically have been scheduled at the end.
+ DefIndices[Reg] = InsertPosIndex;
+ }
+ }
+
+ PrescanInstruction(MI);
+ ScanInstruction(MI, Count);
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SDep *CriticalPathStep(const SUnit *SU) {
+ const SDep *Next = nullptr;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ const SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ return Next;
+}
+
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
+ // It's not safe to change register allocation for source operands of
+ // instructions that have special allocation requirements. Also assume all
+ // registers used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservative here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special =
+ MI.isCall() || MI.hasExtraSrcRegAllocReq() || TII->isPredicated(MI);
+
+ // Scan the register operands for this instruction and update
+ // Classes and RegRefs.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ const TargetRegisterClass *NewRC = nullptr;
+
+ if (i < MI.getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ // Now check for aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) {
+ // If an alias of the reg is used during the live range, give up.
+ // Note that this allows us to skip checking if AntiDepReg
+ // overlaps with any of the aliases, among other things.
+ unsigned AliasReg = *AI;
+ if (Classes[AliasReg]) {
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+
+ // If we're still willing to consider this register, note the reference.
+ if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // If this reg is tied and live (Classes[Reg] is set to -1), we can't change
+ // it or any of its sub or super regs. We need to use KeepRegs to mark the
+ // reg because not all uses of the same reg within an instruction are
+ // necessarily tagged as tied.
+ // Example: an x86 "xor %eax, %eax" will have one source operand tied to the
+ // def register but not the second (see PR20020 for details).
+ // FIXME: can this check be relaxed to account for undef uses
+ // of a register? In the above 'xor' example, the uses of %eax are undef, so
+ // earlier instructions could still replace %eax even though the 'xor'
+ // itself can't be changed.
+ if (MI.isRegTiedToUseOperand(i) &&
+ Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs) {
+ KeepRegs.set(*SubRegs);
+ }
+ for (MCSuperRegIterator SuperRegs(Reg, TRI);
+ SuperRegs.isValid(); ++SuperRegs) {
+ KeepRegs.set(*SuperRegs);
+ }
+ }
+
+ if (MO.isUse() && Special) {
+ if (!KeepRegs.test(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ KeepRegs.set(*SubRegs);
+ }
+ }
+ }
+}
+
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
+ // Update liveness.
+ // Proceeding upwards, registers that are defed but not used in this
+ // instruction are now dead.
+ assert(!MI.isKill() && "Attempting to scan a kill instruction");
+
+ if (!TII->isPredicated(MI)) {
+ // Predicated defs are modeled as read + write, i.e. similar to two
+ // address updates.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+
+ if (MO.isRegMask())
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+ if (MO.clobbersPhysReg(i)) {
+ DefIndices[i] = Count;
+ KillIndices[i] = ~0u;
+ KeepRegs.reset(i);
+ Classes[i] = nullptr;
+ RegRefs.erase(i);
+ }
+
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+
+ // Ignore two-addr defs.
+ if (MI.isRegTiedToUseOperand(i))
+ continue;
+
+ // If we've already marked this reg as unchangeable, don't remove
+ // it or any of its subregs from KeepRegs.
+ bool Keep = KeepRegs.test(Reg);
+
+ // For the reg itself and all subregs: update the def to current;
+ // reset the kill state, any restrictions, and references.
+ for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) {
+ unsigned SubregReg = *SRI;
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ Classes[SubregReg] = nullptr;
+ RegRefs.erase(SubregReg);
+ if (!Keep)
+ KeepRegs.reset(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
+ }
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isUse()) continue;
+
+ const TargetRegisterClass *NewRC = nullptr;
+ if (i < MI.getDesc().getNumOperands())
+ NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF);
+
+ // For now, only allow the register to be changed if its register
+ // class is consistent across all uses.
+ if (!Classes[Reg] && NewRC)
+ Classes[Reg] = NewRC;
+ else if (!NewRC || Classes[Reg] != NewRC)
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ RegRefs.insert(std::make_pair(Reg, &MO));
+
+ // It wasn't previously live but now it is, this is a kill.
+ // Repeat for all aliases.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (KillIndices[AliasReg] == ~0u) {
+ KillIndices[AliasReg] = Count;
+ DefIndices[AliasReg] = ~0u;
+ }
+ }
+ }
+}
+
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instruction with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg)
+{
+ for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+ MachineOperand *RefOper = I->second;
+
+ // Don't allow the instruction defining AntiDepReg to earlyclobber its
+ // operands, in case they may be assigned to NewReg. In this case antidep
+ // breaking must fail, but it's too rare to bother optimizing.
+ if (RefOper->isDef() && RefOper->isEarlyClobber())
+ return true;
+
+ // Handle cases in which this instruction defines NewReg.
+ MachineInstr *MI = RefOper->getParent();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &CheckOper = MI->getOperand(i);
+
+ if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
+ return true;
+
+ if (!CheckOper.isReg() || !CheckOper.isDef() ||
+ CheckOper.getReg() != NewReg)
+ continue;
+
+ // Don't allow the instruction to define NewReg and AntiDepReg.
+ // When AntiDepReg is renamed it will be an illegal op.
+ if (RefOper->isDef())
+ return true;
+
+ // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+ // NewReg.
+ if (CheckOper.isEarlyClobber())
+ return true;
+
+ // Don't allow inline asm to define NewReg at all. Who knows what it's
+ // doing with it.
+ if (MI->isInlineAsm())
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned CriticalAntiDepBreaker::
+findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &Forbid)
+{
+ ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
+ for (unsigned i = 0; i != Order.size(); ++i) {
+ unsigned NewReg = Order[i];
+ // Don't replace a register with itself.
+ if (NewReg == AntiDepReg) continue;
+ // Don't replace a register with one that was recently used to repair
+ // an anti-dependence with this AntiDepReg, because that would
+ // re-introduce that anti-dependence.
+ if (NewReg == LastNewReg) continue;
+ // If any instructions that define AntiDepReg also define the NewReg, it's
+ // not suitable. For example, Instruction with multiple definitions can
+ // result in this condition.
+ if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
+ // If NewReg is dead and NewReg's most recent def is not before
+ // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+ assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+ && "Kill and Def maps aren't consistent for AntiDepReg!");
+ assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+ && "Kill and Def maps aren't consistent for NewReg!");
+ if (KillIndices[NewReg] != ~0u ||
+ Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+ KillIndices[AntiDepReg] > DefIndices[NewReg])
+ continue;
+ // If NewReg overlaps any of the forbidden registers, we can't use it.
+ bool Forbidden = false;
+ for (SmallVectorImpl<unsigned>::iterator it = Forbid.begin(),
+ ite = Forbid.end(); it != ite; ++it)
+ if (TRI->regsOverlap(NewReg, *it)) {
+ Forbidden = true;
+ break;
+ }
+ if (Forbidden) continue;
+ return NewReg;
+ }
+
+ // No registers are free and available!
+ return 0;
+}
+
+unsigned CriticalAntiDepBreaker::
+BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) {
+ // The code below assumes that there is at least one instruction,
+ // so just duck out immediately if the block is empty.
+ if (SUnits.empty()) return 0;
+
+ // Keep a map of the MachineInstr*'s back to the SUnit representing them.
+ // This is used for updating debug information.
+ //
+ // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
+ DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+
+ // Find the node at the bottom of the critical path.
+ const SUnit *Max = nullptr;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ const SUnit *SU = &SUnits[i];
+ MISUnitMap[SU->getInstr()] = SU;
+ if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+ Max = SU;
+ }
+
+#ifndef NDEBUG
+ {
+ DEBUG(dbgs() << "Critical path has total latency "
+ << (Max->getDepth() + Max->Latency) << "\n");
+ DEBUG(dbgs() << "Available regs:");
+ for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] == ~0u)
+ DEBUG(dbgs() << " " << TRI->getName(Reg));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+#endif
+
+ // Track progress along the critical path through the SUnit graph as we walk
+ // the instructions.
+ const SUnit *CriticalPathSU = Max;
+ MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+ // Consider this pattern:
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // A = ...
+ // ... = A
+ // There are three anti-dependencies here, and without special care,
+ // we'd break all of them using the same register:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // B = ...
+ // ... = B
+ // because at each anti-dependence, B is the first register that
+ // isn't A which is free. This re-introduces anti-dependencies
+ // at all but one of the original anti-dependencies that we were
+ // trying to break. To avoid this, keep track of the most recent
+ // register that each register was replaced with, avoid
+ // using it to repair an anti-dependence on the same register.
+ // This lets us produce this:
+ // A = ...
+ // ... = A
+ // B = ...
+ // ... = B
+ // C = ...
+ // ... = C
+ // B = ...
+ // ... = B
+ // This still has an anti-dependence on B, but at least it isn't on the
+ // original critical path.
+ //
+ // TODO: If we tracked more than one register here, we could potentially
+ // fix that remaining critical edge too. This is a little more involved,
+ // because unlike the most recent register, less recent registers should
+ // still be considered, though only if no other registers are available.
+ std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
+
+ // Attempt to break anti-dependence edges on the critical path. Walk the
+ // instructions from the bottom up, tracking information about liveness
+ // as we go to help determine which registers are available.
+ unsigned Broken = 0;
+ unsigned Count = InsertPosIndex - 1;
+ for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) {
+ MachineInstr &MI = *--I;
+ // Kill instructions can define registers but are really nops, and there
+ // might be a real definition earlier that needs to be paired with uses
+ // dominated by this kill.
+
+ // FIXME: It may be possible to remove the isKill() restriction once PR18663
+ // has been properly fixed. There can be value in processing kills as seen
+ // in the AggressiveAntiDepBreaker class.
+ if (MI.isDebugValue() || MI.isKill())
+ continue;
+
+ // Check if this instruction has a dependence on the critical path that
+ // is an anti-dependence that we may be able to break. If it is, set
+ // AntiDepReg to the non-zero register associated with the anti-dependence.
+ //
+ // We limit our attention to the critical path as a heuristic to avoid
+ // breaking anti-dependence edges that aren't going to significantly
+ // impact the overall schedule. There are a limited number of registers
+ // and we want to save them for the important edges.
+ //
+ // TODO: Instructions with multiple defs could have multiple
+ // anti-dependencies. The current code here only knows how to break one
+ // edge per instruction. Note that we'd have to be able to break all of
+ // the anti-dependencies in an instruction in order to be effective.
+ unsigned AntiDepReg = 0;
+ if (&MI == CriticalPathMI) {
+ if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+ const SUnit *NextSU = Edge->getSUnit();
+
+ // Only consider anti-dependence edges.
+ if (Edge->getKind() == SDep::Anti) {
+ AntiDepReg = Edge->getReg();
+ assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+ if (!MRI.isAllocatable(AntiDepReg))
+ // Don't break anti-dependencies on non-allocatable registers.
+ AntiDepReg = 0;
+ else if (KeepRegs.test(AntiDepReg))
+ // Don't break anti-dependencies if a use down below requires
+ // this exact register.
+ AntiDepReg = 0;
+ else {
+ // If the SUnit has other dependencies on the SUnit that it
+ // anti-depends on, don't bother breaking the anti-dependency
+ // since those edges would prevent such units from being
+ // scheduled past each other regardless.
+ //
+ // Also, if there are dependencies on other SUnits with the
+ // same register as the anti-dependency, don't attempt to
+ // break it.
+ for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
+ PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+ if (P->getSUnit() == NextSU ?
+ (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+ (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ }
+ }
+ CriticalPathSU = NextSU;
+ CriticalPathMI = CriticalPathSU->getInstr();
+ } else {
+ // We've reached the end of the critical path.
+ CriticalPathSU = nullptr;
+ CriticalPathMI = nullptr;
+ }
+ }
+
+ PrescanInstruction(MI);
+
+ SmallVector<unsigned, 2> ForbidRegs;
+
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI))
+ // If this instruction's defs have special allocation requirement, don't
+ // break this anti-dependency.
+ AntiDepReg = 0;
+ else if (AntiDepReg) {
+ // If this instruction has a use of AntiDepReg, breaking it
+ // is invalid. If the instruction defines other registers,
+ // save a list of them so that we don't pick a new register
+ // that overlaps any of them.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
+ AntiDepReg = 0;
+ break;
+ }
+ if (MO.isDef() && Reg != AntiDepReg)
+ ForbidRegs.push_back(Reg);
+ }
+ }
+
+ // Determine AntiDepReg's register class, if it is live and is
+ // consistently used within a single class.
+ const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg]
+ : nullptr;
+ assert((AntiDepReg == 0 || RC != nullptr) &&
+ "Register should be live if it's causing an anti-dependence!");
+ if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+ AntiDepReg = 0;
+
+ // Look for a suitable register to use to break the anti-dependence.
+ //
+ // TODO: Instead of picking the first free register, consider which might
+ // be the best.
+ if (AntiDepReg != 0) {
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+ AntiDepReg,
+ LastNewReg[AntiDepReg],
+ RC, ForbidRegs)) {
+ DEBUG(dbgs() << "Breaking anti-dependence edge on "
+ << TRI->getName(AntiDepReg)
+ << " with " << RegRefs.count(AntiDepReg) << " references"
+ << " using " << TRI->getName(NewReg) << "!\n");
+
+ // Update the references to the old register to refer to the new
+ // register.
+ for (std::multimap<unsigned, MachineOperand *>::iterator
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+ Q->second->setReg(NewReg);
+ // If the SU for the instruction being updated has debug information
+ // related to the anti-dependency register, make sure to update that
+ // as well.
+ const SUnit *SU = MISUnitMap[Q->second->getParent()];
+ if (!SU) continue;
+ for (DbgValueVector::iterator DVI = DbgValues.begin(),
+ DVE = DbgValues.end(); DVI != DVE; ++DVI)
+ if (DVI->second == Q->second->getParent())
+ UpdateDbgValue(*DVI->first, AntiDepReg, NewReg);
+ }
+
+ // We just went back in time and modified history; the
+ // liveness information for the anti-dependence reg is now
+ // inconsistent. Set the state as if it were dead.
+ Classes[NewReg] = Classes[AntiDepReg];
+ DefIndices[NewReg] = DefIndices[AntiDepReg];
+ KillIndices[NewReg] = KillIndices[AntiDepReg];
+ assert(((KillIndices[NewReg] == ~0u) !=
+ (DefIndices[NewReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for NewReg!");
+
+ Classes[AntiDepReg] = nullptr;
+ DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+ KillIndices[AntiDepReg] = ~0u;
+ assert(((KillIndices[AntiDepReg] == ~0u) !=
+ (DefIndices[AntiDepReg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for AntiDepReg!");
+
+ RegRefs.erase(AntiDepReg);
+ LastNewReg[AntiDepReg] = NewReg;
+ ++Broken;
+ }
+ }
+
+ ScanInstruction(MI, Count);
+ }
+
+ return Broken;
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
new file mode 100644
index 000000000000..678779fa1a26
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -0,0 +1,106 @@
+//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+
+namespace llvm {
+class RegisterClassInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class MachineFunction;
+
+class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker {
+ MachineFunction& MF;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const RegisterClassInfo &RegClassInfo;
+
+ /// The set of allocatable registers.
+ /// We'll be ignoring anti-dependencies on non-allocatable registers,
+ /// because they may not be safe to break.
+ const BitVector AllocatableSet;
+
+ /// For live regs that are only used in one register class in a
+ /// live range, the register class. If the register is not live, the
+ /// corresponding value is null. If the register is live but used in
+ /// multiple register classes, the corresponding value is -1 casted to a
+ /// pointer.
+ std::vector<const TargetRegisterClass*> Classes;
+
+ /// Map registers to all their references within a live range.
+ std::multimap<unsigned, MachineOperand *> RegRefs;
+ typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+ RegRefIter;
+
+ /// The index of the most recent kill (proceeding bottom-up),
+ /// or ~0u if the register is not live.
+ std::vector<unsigned> KillIndices;
+
+ /// The index of the most recent complete def (proceeding
+ /// bottom up), or ~0u if the register is live.
+ std::vector<unsigned> DefIndices;
+
+ /// A set of registers which are live and cannot be changed to
+ /// break anti-dependencies.
+ BitVector KeepRegs;
+
+ public:
+ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
+ ~CriticalAntiDepBreaker() override;
+
+ /// Initialize anti-dep breaking for a new basic block.
+ void StartBlock(MachineBasicBlock *BB) override;
+
+ /// Identifiy anti-dependencies along the critical path
+ /// of the ScheduleDAG and break them by renaming registers.
+ unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned InsertPosIndex,
+ DbgValueVector &DbgValues) override;
+
+ /// Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ void Observe(MachineInstr &MI, unsigned Count,
+ unsigned InsertPosIndex) override;
+
+ /// Finish anti-dep breaking for a basic block.
+ void FinishBlock() override;
+
+ private:
+ void PrescanInstruction(MachineInstr &MI);
+ void ScanInstruction(MachineInstr &MI, unsigned Count);
+ bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg);
+ unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned AntiDepReg,
+ unsigned LastNewReg,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &Forbid);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 000000000000..2386af9e6877
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,325 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "packets"
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+// --------------------------------------------------------------------
+// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
+
+namespace {
+ DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
+ return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
+ }
+
+ /// Return the DFAInput for an instruction class input vector.
+ /// This function is used in both DFAPacketizer.cpp and in
+ /// DFAPacketizerEmitter.cpp.
+ DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
+ DFAInput InsnInput = 0;
+ assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+ "Exceeded maximum number of DFA terms");
+ for (auto U : InsnClass)
+ InsnInput = addDFAFuncUnits(InsnInput, U);
+ return InsnInput;
+ }
+}
+// --------------------------------------------------------------------
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
+ const DFAStateInput (*SIT)[2],
+ const unsigned *SET):
+ InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
+ DFAStateEntryTable(SET) {
+ // Make sure DFA types are large enough for the number of terms & resources.
+ static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
+ (8 * sizeof(DFAInput)),
+ "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
+ static_assert(
+ (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
+ "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
+}
+
+
+// Read the DFA transition table and update CachedTable.
+//
+// Format of the transition tables:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+// transitions
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
+// for the ith state
+//
+void DFAPacketizer::ReadTable(unsigned int state) {
+ unsigned ThisState = DFAStateEntryTable[state];
+ unsigned NextStateInTable = DFAStateEntryTable[state+1];
+ // Early exit in case CachedTable has already contains this
+ // state's transitions.
+ if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0])))
+ return;
+
+ for (unsigned i = ThisState; i < NextStateInTable; i++)
+ CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
+ DFAStateInputTable[i][1];
+}
+
+
+// Return the DFAInput for an instruction class.
+DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
+ // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
+ DFAInput InsnInput = 0;
+ unsigned i = 0;
+ (void)i;
+ for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
+ *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
+ InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
+ assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
+ }
+ return InsnInput;
+}
+
+
+// Return the DFAInput for an instruction class input vector.
+DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
+ return getDFAInsnInput(InsnClass);
+}
+
+
+// Check if the resources occupied by a MCInstrDesc are available in the
+// current state.
+bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ DFAInput InsnInput = getInsnInput(InsnClass);
+ UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
+ ReadTable(CurrentState);
+ return CachedTable.count(StateTrans) != 0;
+}
+
+
+// Reserve the resources occupied by a MCInstrDesc and change the current
+// state to reflect that change.
+void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+ unsigned InsnClass = MID->getSchedClass();
+ DFAInput InsnInput = getInsnInput(InsnClass);
+ UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
+ ReadTable(CurrentState);
+ assert(CachedTable.count(StateTrans) != 0);
+ CurrentState = CachedTable[StateTrans];
+}
+
+
+// Check if the resources occupied by a machine instruction are available
+// in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr &MI) {
+ const llvm::MCInstrDesc &MID = MI.getDesc();
+ return canReserveResources(&MID);
+}
+
+
+// Reserve the resources occupied by a machine instruction and change the
+// current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr &MI) {
+ const llvm::MCInstrDesc &MID = MI.getDesc();
+ reserveResources(&MID);
+}
+
+
+namespace llvm {
+// This class extends ScheduleDAGInstrs and overrides the schedule method
+// to build the dependence graph.
+class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+private:
+ AliasAnalysis *AA;
+ /// Ordered list of DAG postprocessing steps.
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+public:
+ DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
+ AliasAnalysis *AA);
+ // Actual scheduling work.
+ void schedule() override;
+
+ /// DefaultVLIWScheduler takes ownership of the Mutation object.
+ void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
+ Mutations.push_back(std::move(Mutation));
+ }
+protected:
+ void postprocessDAG();
+};
+}
+
+
+DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
+ MachineLoopInfo &MLI,
+ AliasAnalysis *AA)
+ : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
+ CanHandleTerminators = true;
+}
+
+
+/// Apply each ScheduleDAGMutation step in order.
+void DefaultVLIWScheduler::postprocessDAG() {
+ for (auto &M : Mutations)
+ M->apply(this);
+}
+
+
+void DefaultVLIWScheduler::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(AA);
+ postprocessDAG();
+}
+
+
+VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
+ MachineLoopInfo &mli, AliasAnalysis *aa)
+ : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
+ ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
+ VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
+}
+
+
+VLIWPacketizerList::~VLIWPacketizerList() {
+ if (VLIWScheduler)
+ delete VLIWScheduler;
+ if (ResourceTracker)
+ delete ResourceTracker;
+}
+
+
+// End the current packet, bundle packet instructions and reset DFA state.
+void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator MI) {
+ if (CurrentPacketMIs.size() > 1) {
+ MachineInstr &MIFirst = *CurrentPacketMIs.front();
+ finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
+ }
+ CurrentPacketMIs.clear();
+ ResourceTracker->clearResources();
+ DEBUG(dbgs() << "End packet\n");
+}
+
+
+// Bundle machine instructions into packets.
+void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator BeginItr,
+ MachineBasicBlock::iterator EndItr) {
+ assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+ VLIWScheduler->startBlock(MBB);
+ VLIWScheduler->enterRegion(MBB, BeginItr, EndItr,
+ std::distance(BeginItr, EndItr));
+ VLIWScheduler->schedule();
+
+ DEBUG({
+ dbgs() << "Scheduling DAG of the packetize region\n";
+ for (SUnit &SU : VLIWScheduler->SUnits)
+ SU.dumpAll(VLIWScheduler);
+ });
+
+ // Generate MI -> SU map.
+ MIToSUnit.clear();
+ for (SUnit &SU : VLIWScheduler->SUnits)
+ MIToSUnit[SU.getInstr()] = &SU;
+
+ // The main packetizer loop.
+ for (; BeginItr != EndItr; ++BeginItr) {
+ MachineInstr &MI = *BeginItr;
+ initPacketizerState();
+
+ // End the current packet if needed.
+ if (isSoloInstruction(MI)) {
+ endPacket(MBB, MI);
+ continue;
+ }
+
+ // Ignore pseudo instructions.
+ if (ignorePseudoInstruction(MI, MBB))
+ continue;
+
+ SUnit *SUI = MIToSUnit[&MI];
+ assert(SUI && "Missing SUnit Info!");
+
+ // Ask DFA if machine resource is available for MI.
+ DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
+
+ bool ResourceAvail = ResourceTracker->canReserveResources(MI);
+ DEBUG({
+ if (ResourceAvail)
+ dbgs() << " Resources are available for adding MI to packet\n";
+ else
+ dbgs() << " Resources NOT available\n";
+ });
+ if (ResourceAvail && shouldAddToPacket(MI)) {
+ // Dependency check for MI with instructions in CurrentPacketMIs.
+ for (auto MJ : CurrentPacketMIs) {
+ SUnit *SUJ = MIToSUnit[MJ];
+ assert(SUJ && "Missing SUnit Info!");
+
+ DEBUG(dbgs() << " Checking against MJ " << *MJ);
+ // Is it legal to packetize SUI and SUJ together.
+ if (!isLegalToPacketizeTogether(SUI, SUJ)) {
+ DEBUG(dbgs() << " Not legal to add MI, try to prune\n");
+ // Allow packetization if dependency can be pruned.
+ if (!isLegalToPruneDependencies(SUI, SUJ)) {
+ // End the packet if dependency cannot be pruned.
+ DEBUG(dbgs() << " Could not prune dependencies for adding MI\n");
+ endPacket(MBB, MI);
+ break;
+ }
+ DEBUG(dbgs() << " Pruned dependence for adding MI\n");
+ }
+ }
+ } else {
+ DEBUG(if (ResourceAvail)
+ dbgs() << "Resources are available, but instruction should not be "
+ "added to packet\n " << MI);
+ // End the packet if resource is not available, or if the instruction
+ // shoud not be added to the current packet.
+ endPacket(MBB, MI);
+ }
+
+ // Add MI to the current packet.
+ DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');
+ BeginItr = addToPacket(MI);
+ } // For all instructions in the packetization range.
+
+ // End any packet left behind.
+ endPacket(MBB, EndItr);
+ VLIWScheduler->exitRegion();
+ VLIWScheduler->finishBlock();
+}
+
+
+// Add a DAG mutation object to the ordered list.
+void VLIWPacketizerList::addMutation(
+ std::unique_ptr<ScheduleDAGMutation> Mutation) {
+ VLIWScheduler->addMutation(std::move(Mutation));
+}
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 000000000000..0b8dc7a86ada
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,181 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "codegen-dce"
+
+STATISTIC(NumDeletes, "Number of dead instructions deleted");
+
+namespace {
+ class DeadMachineInstructionElim : public MachineFunctionPass {
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ BitVector LivePhysRegs;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+ initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ bool isDead(const MachineInstr *MI) const;
+ };
+}
+char DeadMachineInstructionElim::ID = 0;
+char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
+
+INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+ "Remove dead machine instructions", false, false)
+
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI->isInlineAsm())
+ return false;
+
+ // Don't delete frame allocation labels.
+ if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE)
+ return false;
+
+ // Don't delete instructions with side effects.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI())
+ return false;
+
+ // Examine each operand.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Don't delete live physreg defs, or any reserved register defs.
+ if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
+ return false;
+ } else {
+ if (!MRI->use_nodbg_empty(Reg))
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
+ }
+ }
+
+ // If there are no defs with uses, the instruction is dead.
+ return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ bool AnyChanges = false;
+ MRI = &MF.getRegInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ // Loop over all instructions in all blocks, from bottom to top, so that it's
+ // more likely that chains of dependent but ultimately dead instructions will
+ // be cleaned up.
+ for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) {
+ // Start out assuming that reserved registers are live out of this block.
+ LivePhysRegs = MRI->getReservedRegs();
+
+ // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+ // live across blocks, but some targets (x86) can have flags live out of a
+ // block.
+ for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
+ E = MBB.succ_end(); S != E; S++)
+ for (const auto &LI : (*S)->liveins())
+ LivePhysRegs.set(LI.PhysReg);
+
+ // Now scan the instructions and delete dead ones, tracking physreg
+ // liveness as we go.
+ for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(),
+ MIE = MBB.rend(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // If the instruction is dead, delete it!
+ if (isDead(MI)) {
+ DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ // It is possible that some DBG_VALUE instructions refer to this
+ // instruction. They get marked as undef and will be deleted
+ // in the live debug variable analysis.
+ MI->eraseFromParentAndMarkDBGValuesForRemoval();
+ AnyChanges = true;
+ ++NumDeletes;
+ MIE = MBB.rend();
+ // MII is now pointing to the next instruction to process,
+ // so don't increment it.
+ continue;
+ }
+
+ // Record the physreg defs.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Check the subreg set, not the alias set, because a def
+ // of a super-register may still be partially live after
+ // this def.
+ for (MCSubRegIterator SR(Reg, TRI,/*IncludeSelf=*/true);
+ SR.isValid(); ++SR)
+ LivePhysRegs.reset(*SR);
+ }
+ } else if (MO.isRegMask()) {
+ // Register mask of preserved registers. All clobbers are dead.
+ LivePhysRegs.clearBitsNotInMask(MO.getRegMask());
+ }
+ }
+ // Record the physreg uses, after the defs, in case a physreg is
+ // both defined and used in the same instruction.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ LivePhysRegs.set(*AI);
+ }
+ }
+ }
+
+ // We didn't delete the current instruction, so increment MII to
+ // the next one.
+ ++MII;
+ }
+ }
+
+ LivePhysRegs.clear();
+ return AnyChanges;
+}
diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
new file mode 100644
index 000000000000..1d9e79c055e0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -0,0 +1,602 @@
+//===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Analysis that tracks defined/used subregister lanes across COPY instructions
+/// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE,
+/// INSERT_SUBREG, EXTRACT_SUBREG).
+/// The information is used to detect dead definitions and the usage of
+/// (completely) undefined values and mark the operands as such.
+/// This pass is necessary because the dead/undef status is not obvious anymore
+/// when subregisters are involved.
+///
+/// Example:
+/// %vreg0 = some definition
+/// %vreg1 = IMPLICIT_DEF
+/// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1
+/// %vreg3 = EXTRACT_SUBREG %vreg2, sub1
+/// = use %vreg3
+/// The %vreg0 definition is dead and %vreg3 contains an undefined value.
+//
+//===----------------------------------------------------------------------===//
+
+#include <deque>
+#include <vector>
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "detect-dead-lanes"
+
+namespace {
+
+/// Contains a bitmask of which lanes of a given virtual register are
+/// defined and which ones are actually used.
+struct VRegInfo {
+ LaneBitmask UsedLanes;
+ LaneBitmask DefinedLanes;
+};
+
+class DetectDeadLanes : public MachineFunctionPass {
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+ DetectDeadLanes() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override { return "Detect Dead Lanes"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ /// Add used lane bits on the register used by operand \p MO. This translates
+ /// the bitmask based on the operands subregister, and puts the register into
+ /// the worklist if any new bits were added.
+ void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes);
+
+ /// Given a bitmask \p UsedLanes for the used lanes on a def output of a
+ /// COPY-like instruction determine the lanes used on the use operands
+ /// and call addUsedLanesOnOperand() for them.
+ void transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes);
+
+ /// Given a use regiser operand \p Use and a mask of defined lanes, check
+ /// if the operand belongs to a lowersToCopies() instruction, transfer the
+ /// mask to the def and put the instruction into the worklist.
+ void transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes);
+
+ /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum
+ /// of COPY-like instruction, determine which lanes are defined at the output
+ /// operand \p Def.
+ LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum,
+ LaneBitmask DefinedLanes) const;
+
+ /// Given a mask \p UsedLanes used from the output of instruction \p MI
+ /// determine which lanes are used from operand \p MO of this instruction.
+ LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes,
+ const MachineOperand &MO) const;
+
+ bool runOnce(MachineFunction &MF);
+
+ LaneBitmask determineInitialDefinedLanes(unsigned Reg);
+ LaneBitmask determineInitialUsedLanes(unsigned Reg);
+
+ bool isUndefRegAtInput(const MachineOperand &MO,
+ const VRegInfo &RegInfo) const;
+
+ bool isUndefInput(const MachineOperand &MO, bool *CrossCopy) const;
+
+ const MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+
+ void PutInWorklist(unsigned RegIdx) {
+ if (WorklistMembers.test(RegIdx))
+ return;
+ WorklistMembers.set(RegIdx);
+ Worklist.push_back(RegIdx);
+ }
+
+ VRegInfo *VRegInfos;
+ /// Worklist containing virtreg indexes.
+ std::deque<unsigned> Worklist;
+ BitVector WorklistMembers;
+ /// This bitvector is set for each vreg index where the vreg is defined
+ /// by an instruction where lowersToCopies()==true.
+ BitVector DefinedByCopy;
+};
+
+} // end anonymous namespace
+
+char DetectDeadLanes::ID = 0;
+char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
+
+INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes",
+ false, false)
+
+/// Returns true if \p MI will get lowered to a series of COPY instructions.
+/// We call this a COPY-like instruction.
+static bool lowersToCopies(const MachineInstr &MI) {
+ // Note: We could support instructions with MCInstrDesc::isRegSequenceLike(),
+ // isExtractSubRegLike(), isInsertSubregLike() in the future even though they
+ // are not lowered to a COPY.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::EXTRACT_SUBREG:
+ return true;
+ }
+ return false;
+}
+
+static bool isCrossCopy(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ const TargetRegisterClass *DstRC,
+ const MachineOperand &MO) {
+ assert(lowersToCopies(MI));
+ unsigned SrcReg = MO.getReg();
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ if (DstRC == SrcRC)
+ return false;
+
+ unsigned SrcSubIdx = MO.getSubReg();
+
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned DstSubIdx = 0;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::INSERT_SUBREG:
+ if (MI.getOperandNo(&MO) == 2)
+ DstSubIdx = MI.getOperand(3).getImm();
+ break;
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned OpNum = MI.getOperandNo(&MO);
+ DstSubIdx = MI.getOperand(OpNum+1).getImm();
+ break;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ unsigned SubReg = MI.getOperand(2).getImm();
+ SrcSubIdx = TRI.composeSubRegIndices(SubReg, SrcSubIdx);
+ }
+ }
+
+ unsigned PreA, PreB; // Unused.
+ if (SrcSubIdx && DstSubIdx)
+ return !TRI.getCommonSuperRegClass(SrcRC, SrcSubIdx, DstRC, DstSubIdx, PreA,
+ PreB);
+ if (SrcSubIdx)
+ return !TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSubIdx);
+ if (DstSubIdx)
+ return !TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSubIdx);
+ return !TRI.getCommonSubClass(SrcRC, DstRC);
+}
+
+void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
+ LaneBitmask UsedLanes) {
+ if (!MO.readsReg())
+ return;
+ unsigned MOReg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(MOReg))
+ return;
+
+ unsigned MOSubReg = MO.getSubReg();
+ if (MOSubReg != 0)
+ UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes);
+ UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
+
+ unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg);
+ VRegInfo &MORegInfo = VRegInfos[MORegIdx];
+ LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
+ // Any change at all?
+ if ((UsedLanes & ~PrevUsedLanes) == 0)
+ return;
+
+ // Set UsedLanes and remember instruction for further propagation.
+ MORegInfo.UsedLanes = PrevUsedLanes | UsedLanes;
+ if (DefinedByCopy.test(MORegIdx))
+ PutInWorklist(MORegIdx);
+}
+
+void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI,
+ LaneBitmask UsedLanes) {
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO);
+ addUsedLanesOnOperand(MO, UsedOnMO);
+ }
+}
+
+LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
+ LaneBitmask UsedLanes,
+ const MachineOperand &MO) const {
+ unsigned OpNum = MI.getOperandNo(&MO);
+ assert(lowersToCopies(MI) && DefinedByCopy[
+ TargetRegisterInfo::virtReg2Index(MI.getOperand(0).getReg())]);
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ return UsedLanes;
+ case TargetOpcode::REG_SEQUENCE: {
+ assert(OpNum % 2 == 1);
+ unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
+ return TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ }
+ case TargetOpcode::INSERT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(3).getImm();
+ LaneBitmask MO2UsedLanes =
+ TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ if (OpNum == 2)
+ return MO2UsedLanes;
+
+ const MachineOperand &Def = MI.getOperand(0);
+ unsigned DefReg = Def.getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LaneBitmask MO1UsedLanes;
+ if (RC->CoveredBySubRegs)
+ MO1UsedLanes = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx);
+ else
+ MO1UsedLanes = RC->LaneMask;
+
+ assert(OpNum == 1);
+ return MO1UsedLanes;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ assert(OpNum == 1);
+ unsigned SubIdx = MI.getOperand(2).getImm();
+ return TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes);
+ }
+ default:
+ llvm_unreachable("function must be called with COPY-like instruction");
+ }
+}
+
+void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes) {
+ if (!Use.readsReg())
+ return;
+ // Check whether the operand writes a vreg and is part of a COPY-like
+ // instruction.
+ const MachineInstr &MI = *Use.getParent();
+ if (MI.getDesc().getNumDefs() != 1)
+ return;
+ // FIXME: PATCHPOINT instructions announce a Def that does not always exist,
+ // they really need to be modeled differently!
+ if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
+ return;
+ const MachineOperand &Def = *MI.defs().begin();
+ unsigned DefReg = Def.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ return;
+ unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
+ if (!DefinedByCopy.test(DefRegIdx))
+ return;
+
+ unsigned OpNum = MI.getOperandNo(&Use);
+ DefinedLanes =
+ TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes);
+ DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes);
+
+ VRegInfo &RegInfo = VRegInfos[DefRegIdx];
+ LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes;
+ // Any change at all?
+ if ((DefinedLanes & ~PrevDefinedLanes) == 0)
+ return;
+
+ RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes;
+ PutInWorklist(DefRegIdx);
+}
+
+LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def,
+ unsigned OpNum, LaneBitmask DefinedLanes) const {
+ const MachineInstr &MI = *Def.getParent();
+ // Translate DefinedLanes if necessary.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::REG_SEQUENCE: {
+ unsigned SubIdx = MI.getOperand(OpNum + 1).getImm();
+ DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
+ break;
+ }
+ case TargetOpcode::INSERT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(3).getImm();
+ if (OpNum == 2) {
+ DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx);
+ } else {
+ assert(OpNum == 1 && "INSERT_SUBREG must have two operands");
+ // Ignore lanes defined by operand 2.
+ DefinedLanes &= ~TRI->getSubRegIndexLaneMask(SubIdx);
+ }
+ break;
+ }
+ case TargetOpcode::EXTRACT_SUBREG: {
+ unsigned SubIdx = MI.getOperand(2).getImm();
+ assert(OpNum == 1 && "EXTRACT_SUBREG must have one register operand only");
+ DefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, DefinedLanes);
+ break;
+ }
+ case TargetOpcode::COPY:
+ case TargetOpcode::PHI:
+ break;
+ default:
+ llvm_unreachable("function must be called with COPY-like instruction");
+ }
+
+ assert(Def.getSubReg() == 0 &&
+ "Should not have subregister defs in machine SSA phase");
+ DefinedLanes &= MRI->getMaxLaneMaskForVReg(Def.getReg());
+ return DefinedLanes;
+}
+
+LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
+ // Live-In or unused registers have no definition but are considered fully
+ // defined.
+ if (!MRI->hasOneDef(Reg))
+ return ~0u;
+
+ const MachineOperand &Def = *MRI->def_begin(Reg);
+ const MachineInstr &DefMI = *Def.getParent();
+ if (lowersToCopies(DefMI)) {
+ // Start optimisatically with no used or defined lanes for copy
+ // instructions. The following dataflow analysis will add more bits.
+ unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ DefinedByCopy.set(RegIdx);
+ PutInWorklist(RegIdx);
+
+ if (Def.isDead())
+ return 0;
+
+ // COPY/PHI can copy across unrelated register classes (example: float/int)
+ // with incompatible subregister structure. Do not include these in the
+ // dataflow analysis since we cannot transfer lanemasks in a meaningful way.
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
+
+ // Determine initially DefinedLanes.
+ LaneBitmask DefinedLanes = 0;
+ for (const MachineOperand &MO : DefMI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+
+ LaneBitmask MODefinedLanes;
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ MODefinedLanes = ~0u;
+ } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
+ MODefinedLanes = ~0u;
+ } else {
+ assert(TargetRegisterInfo::isVirtualRegister(MOReg));
+ if (MRI->hasOneDef(MOReg)) {
+ const MachineOperand &MODef = *MRI->def_begin(MOReg);
+ const MachineInstr &MODefMI = *MODef.getParent();
+ // Bits from copy-like operations will be added later.
+ if (lowersToCopies(MODefMI) || MODefMI.isImplicitDef())
+ continue;
+ }
+ unsigned MOSubReg = MO.getSubReg();
+ MODefinedLanes = MRI->getMaxLaneMaskForVReg(MOReg);
+ MODefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(
+ MOSubReg, MODefinedLanes);
+ }
+
+ unsigned OpNum = DefMI.getOperandNo(&MO);
+ DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes);
+ }
+ return DefinedLanes;
+ }
+ if (DefMI.isImplicitDef() || Def.isDead())
+ return 0;
+
+ assert(Def.getSubReg() == 0 &&
+ "Should not have subregister defs in machine SSA phase");
+ return MRI->getMaxLaneMaskForVReg(Reg);
+}
+
+LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
+ LaneBitmask UsedLanes = 0;
+ for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ if (!MO.readsReg())
+ continue;
+
+ const MachineInstr &UseMI = *MO.getParent();
+ if (UseMI.isKill())
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (lowersToCopies(UseMI)) {
+ assert(UseMI.getDesc().getNumDefs() == 1);
+ const MachineOperand &Def = *UseMI.defs().begin();
+ unsigned DefReg = Def.getReg();
+ // The used lanes of COPY-like instruction operands are determined by the
+ // following dataflow analysis.
+ if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
+ // But ignore copies across incompatible register classes.
+ bool CrossCopy = false;
+ if (lowersToCopies(UseMI)) {
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
+ CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
+ if (CrossCopy)
+ DEBUG(dbgs() << "Copy accross incompatible classes: " << UseMI);
+ }
+
+ if (!CrossCopy)
+ continue;
+ }
+ }
+
+ // Shortcut: All lanes are used.
+ if (SubReg == 0)
+ return MRI->getMaxLaneMaskForVReg(Reg);
+
+ UsedLanes |= TRI->getSubRegIndexLaneMask(SubReg);
+ }
+ return UsedLanes;
+}
+
+bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO,
+ const VRegInfo &RegInfo) const {
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask) == 0;
+}
+
+bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
+ bool *CrossCopy) const {
+ if (!MO.isUse())
+ return false;
+ const MachineInstr &MI = *MO.getParent();
+ if (!lowersToCopies(MI))
+ return false;
+ const MachineOperand &Def = MI.getOperand(0);
+ unsigned DefReg = Def.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ return false;
+ unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
+ if (!DefinedByCopy.test(DefRegIdx))
+ return false;
+
+ const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx];
+ LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
+ if (UsedLanes != 0)
+ return false;
+
+ unsigned MOReg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(MOReg)) {
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
+ *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO);
+ }
+ return true;
+}
+
+bool DetectDeadLanes::runOnce(MachineFunction &MF) {
+ // First pass: Populate defs/uses of vregs with initial values
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+
+ // Determine used/defined lanes and add copy instructions to worklist.
+ VRegInfo &Info = VRegInfos[RegIdx];
+ Info.DefinedLanes = determineInitialDefinedLanes(Reg);
+ Info.UsedLanes = determineInitialUsedLanes(Reg);
+ }
+
+ // Iterate as long as defined lanes/used lanes keep changing.
+ while (!Worklist.empty()) {
+ unsigned RegIdx = Worklist.front();
+ Worklist.pop_front();
+ WorklistMembers.reset(RegIdx);
+ VRegInfo &Info = VRegInfos[RegIdx];
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+
+ // Transfer UsedLanes to operands of DefMI (backwards dataflow).
+ MachineOperand &Def = *MRI->def_begin(Reg);
+ const MachineInstr &MI = *Def.getParent();
+ transferUsedLanesStep(MI, Info.UsedLanes);
+ // Transfer DefinedLanes to users of Reg (forward dataflow).
+ for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg))
+ transferDefinedLanesStep(MO, Info.DefinedLanes);
+ }
+
+ DEBUG(
+ dbgs() << "Defined/Used lanes:\n";
+ for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ const VRegInfo &Info = VRegInfos[RegIdx];
+ dbgs() << PrintReg(Reg, nullptr)
+ << " Used: " << PrintLaneMask(Info.UsedLanes)
+ << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n';
+ }
+ dbgs() << "\n";
+ );
+
+ bool Again = false;
+ // Mark operands as dead/unused.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ const VRegInfo &RegInfo = VRegInfos[RegIdx];
+ if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes == 0) {
+ DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI);
+ MO.setIsDead();
+ }
+ if (MO.readsReg()) {
+ bool CrossCopy = false;
+ if (isUndefRegAtInput(MO, RegInfo)) {
+ DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in "
+ << MI);
+ MO.setIsUndef();
+ } else if (isUndefInput(MO, &CrossCopy)) {
+ DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in "
+ << MI);
+ MO.setIsUndef();
+ if (CrossCopy)
+ Again = true;
+ }
+ }
+ }
+ }
+ }
+
+ return Again;
+}
+
+bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
+ // Don't bother if we won't track subregister liveness later. This pass is
+ // required for correctness if subregister liveness is enabled because the
+ // register coalescer cannot deal with hidden dead defs. However without
+ // subregister liveness enabled, the expected benefits of this pass are small
+ // so we safe the compile time.
+ if (!MF.getSubtarget().enableSubRegLiveness()) {
+ DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
+ return false;
+ }
+
+ MRI = &MF.getRegInfo();
+ TRI = MRI->getTargetRegisterInfo();
+
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ VRegInfos = new VRegInfo[NumVirtRegs];
+ WorklistMembers.resize(NumVirtRegs);
+ DefinedByCopy.resize(NumVirtRegs);
+
+ bool Again;
+ do {
+ Again = runOnce(MF);
+ } while(Again);
+
+ DefinedByCopy.clear();
+ WorklistMembers.clear();
+ delete[] VRegInfos;
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 000000000000..eae78a950d9a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,264 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "dwarfehprepare"
+
+STATISTIC(NumResumesLowered, "Number of resume calls lowered");
+
+namespace {
+ class DwarfEHPrepare : public FunctionPass {
+ const TargetMachine *TM;
+
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ Constant *RewindFunction;
+
+ DominatorTree *DT;
+ const TargetLowering *TLI;
+
+ bool InsertUnwindResumeCalls(Function &Fn);
+ Value *GetExceptionObject(ResumeInst *RI);
+ size_t
+ pruneUnreachableResumes(Function &Fn,
+ SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads);
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in
+ // practice.
+ DwarfEHPrepare()
+ : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr),
+ TLI(nullptr) {}
+
+ DwarfEHPrepare(const TargetMachine *TM)
+ : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr),
+ TLI(nullptr) {}
+
+ bool runOnFunction(Function &Fn) override;
+
+ bool doFinalization(Module &M) override {
+ RewindFunction = nullptr;
+ return false;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ const char *getPassName() const override {
+ return "Exception handling preparation";
+ }
+ };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare",
+ "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare",
+ "Prepare DWARF exceptions", false, false)
+
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
+ return new DwarfEHPrepare(TM);
+}
+
+void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+}
+
+/// GetExceptionObject - Return the exception object from the value passed into
+/// the 'resume' instruction (typically an aggregate). Clean up any dead
+/// instructions, including the 'resume' instruction.
+Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+ Value *V = RI->getOperand(0);
+ Value *ExnObj = nullptr;
+ InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
+ LoadInst *SelLoad = nullptr;
+ InsertValueInst *ExcIVI = nullptr;
+ bool EraseIVIs = false;
+
+ if (SelIVI) {
+ if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) {
+ ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
+ if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
+ ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
+ ExnObj = ExcIVI->getOperand(1);
+ SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
+ EraseIVIs = true;
+ }
+ }
+ }
+
+ if (!ExnObj)
+ ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI);
+
+ RI->eraseFromParent();
+
+ if (EraseIVIs) {
+ if (SelIVI->use_empty())
+ SelIVI->eraseFromParent();
+ if (ExcIVI->use_empty())
+ ExcIVI->eraseFromParent();
+ if (SelLoad && SelLoad->use_empty())
+ SelLoad->eraseFromParent();
+ }
+
+ return ExnObj;
+}
+
+/// Replace resumes that are not reachable from a cleanup landing pad with
+/// unreachable and then simplify those blocks.
+size_t DwarfEHPrepare::pruneUnreachableResumes(
+ Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads) {
+ BitVector ResumeReachable(Resumes.size());
+ size_t ResumeIndex = 0;
+ for (auto *RI : Resumes) {
+ for (auto *LP : CleanupLPads) {
+ if (isPotentiallyReachable(LP, RI, DT)) {
+ ResumeReachable.set(ResumeIndex);
+ break;
+ }
+ }
+ ++ResumeIndex;
+ }
+
+ // If everything is reachable, there is no change.
+ if (ResumeReachable.all())
+ return Resumes.size();
+
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
+ LLVMContext &Ctx = Fn.getContext();
+
+ // Otherwise, insert unreachable instructions and call simplifycfg.
+ size_t ResumesLeft = 0;
+ for (size_t I = 0, E = Resumes.size(); I < E; ++I) {
+ ResumeInst *RI = Resumes[I];
+ if (ResumeReachable[I]) {
+ Resumes[ResumesLeft++] = RI;
+ } else {
+ BasicBlock *BB = RI->getParent();
+ new UnreachableInst(Ctx, RI);
+ RI->eraseFromParent();
+ SimplifyCFG(BB, TTI, 1);
+ }
+ }
+ Resumes.resize(ResumesLeft);
+ return ResumesLeft;
+}
+
+/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
+/// into calls to the appropriate _Unwind_Resume function.
+bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
+ SmallVector<ResumeInst*, 16> Resumes;
+ SmallVector<LandingPadInst*, 16> CleanupLPads;
+ for (BasicBlock &BB : Fn) {
+ if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
+ Resumes.push_back(RI);
+ if (auto *LP = BB.getLandingPadInst())
+ if (LP->isCleanup())
+ CleanupLPads.push_back(LP);
+ }
+
+ if (Resumes.empty())
+ return false;
+
+ // Check the personality, don't do anything if it's funclet-based.
+ EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn());
+ if (isFuncletEHPersonality(Pers))
+ return false;
+
+ LLVMContext &Ctx = Fn.getContext();
+
+ size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
+ if (ResumesLeft == 0)
+ return true; // We pruned them all.
+
+ // Find the rewind function if we didn't already.
+ if (!RewindFunction) {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+ Type::getInt8PtrTy(Ctx), false);
+ const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
+ }
+
+ // Create the basic block where the _Unwind_Resume call will live.
+ if (ResumesLeft == 1) {
+ // Instead of creating a new BB and PHI node, just append the call to
+ // _Unwind_Resume to the end of the single resume block.
+ ResumeInst *RI = Resumes.front();
+ BasicBlock *UnwindBB = RI->getParent();
+ Value *ExnObj = GetExceptionObject(RI);
+
+ // Call the _Unwind_Resume function.
+ CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+ return true;
+ }
+
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft,
+ "exn.obj", UnwindBB);
+
+ // Extract the exception object from the ResumeInst and add it to the PHI node
+ // that feeds the _Unwind_Resume call.
+ for (ResumeInst *RI : Resumes) {
+ BasicBlock *Parent = RI->getParent();
+ BranchInst::Create(UnwindBB, Parent);
+
+ Value *ExnObj = GetExceptionObject(RI);
+ PN->addIncoming(ExnObj, Parent);
+
+ ++NumResumesLowered;
+ }
+
+ // Call the function.
+ CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
+ CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+ // We never expect _Unwind_Resume to return.
+ new UnreachableInst(Ctx, UnwindBB);
+ return true;
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+ assert(TM && "DWARF EH preparation requires a target machine");
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
+ bool Changed = InsertUnwindResumeCalls(Fn);
+ DT = nullptr;
+ TLI = nullptr;
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
new file mode 100644
index 000000000000..8c96124451f3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -0,0 +1,817 @@
+//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Early if-conversion is for out-of-order CPUs that don't have a lot of
+// predicable instructions. The goal is to eliminate conditional branches that
+// may mispredict.
+//
+// Instructions from both sides of the branch are executed specutatively, and a
+// cmov instruction selects the result.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "early-ifcvt"
+
+// Absolute maximum number of instructions allowed per speculated block.
+// This bypasses all other heuristics, so it should be set fairly high.
+static cl::opt<unsigned>
+BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
+ cl::desc("Maximum number of instructions per speculated block."));
+
+// Stress testing mode - disable heuristics.
+static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
+ cl::desc("Turn all knobs to 11"));
+
+STATISTIC(NumDiamondsSeen, "Number of diamonds");
+STATISTIC(NumDiamondsConv, "Number of diamonds converted");
+STATISTIC(NumTrianglesSeen, "Number of triangles");
+STATISTIC(NumTrianglesConv, "Number of triangles converted");
+
+//===----------------------------------------------------------------------===//
+// SSAIfConv
+//===----------------------------------------------------------------------===//
+//
+// The SSAIfConv class performs if-conversion on SSA form machine code after
+// determining if it is possible. The class contains no heuristics; external
+// code should be used to determine when if-conversion is a good idea.
+//
+// SSAIfConv can convert both triangles and diamonds:
+//
+// Triangle: Head Diamond: Head
+// | \ / \_
+// | \ / |
+// | [TF]BB FBB TBB
+// | / \ /
+// | / \ /
+// Tail Tail
+//
+// Instructions in the conditional blocks TBB and/or FBB are spliced into the
+// Head block, and phis in the Tail block are converted to select instructions.
+//
+namespace {
+class SSAIfConv {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+public:
+ /// The block containing the conditional branch.
+ MachineBasicBlock *Head;
+
+ /// The block containing phis after the if-then-else.
+ MachineBasicBlock *Tail;
+
+ /// The 'true' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *TBB;
+
+ /// The 'false' conditional block as determined by AnalyzeBranch.
+ MachineBasicBlock *FBB;
+
+ /// isTriangle - When there is no 'else' block, either TBB or FBB will be
+ /// equal to Tail.
+ bool isTriangle() const { return TBB == Tail || FBB == Tail; }
+
+ /// Returns the Tail predecessor for the True side.
+ MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
+
+ /// Returns the Tail predecessor for the False side.
+ MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
+
+ /// Information about each phi in the Tail block.
+ struct PHIInfo {
+ MachineInstr *PHI;
+ unsigned TReg, FReg;
+ // Latencies from Cond+Branch, TReg, and FReg to DstReg.
+ int CondCycles, TCycles, FCycles;
+
+ PHIInfo(MachineInstr *phi)
+ : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {}
+ };
+
+ SmallVector<PHIInfo, 8> PHIs;
+
+private:
+ /// The branch condition determined by AnalyzeBranch.
+ SmallVector<MachineOperand, 4> Cond;
+
+ /// Instructions in Head that define values used by the conditional blocks.
+ /// The hoisted instructions must be inserted after these instructions.
+ SmallPtrSet<MachineInstr*, 8> InsertAfter;
+
+ /// Register units clobbered by the conditional blocks.
+ BitVector ClobberedRegUnits;
+
+ // Scratch pad for findInsertionPoint.
+ SparseSet<unsigned> LiveRegUnits;
+
+ /// Insertion point in Head for speculatively executed instructions form TBB
+ /// and FBB.
+ MachineBasicBlock::iterator InsertionPoint;
+
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// speculated.
+ bool canSpeculateInstrs(MachineBasicBlock *MBB);
+
+ /// Find a valid insertion point in Head.
+ bool findInsertionPoint();
+
+ /// Replace PHI instructions in Tail with selects.
+ void replacePHIInstrs();
+
+ /// Insert selects and rewrite PHI operands to use them.
+ void rewritePHIOperands();
+
+public:
+ /// runOnMachineFunction - Initialize per-function data structures.
+ void runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ LiveRegUnits.clear();
+ LiveRegUnits.setUniverse(TRI->getNumRegUnits());
+ ClobberedRegUnits.clear();
+ ClobberedRegUnits.resize(TRI->getNumRegUnits());
+ }
+
+ /// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
+ /// initialize the internal state, and return true.
+ bool canConvertIf(MachineBasicBlock *MBB);
+
+ /// convertIf - If-convert the last block passed to canConvertIf(), assuming
+ /// it is possible. Add any erased blocks to RemovedBlocks.
+ void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks);
+};
+} // end anonymous namespace
+
+
+/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely
+/// be speculated. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator(); I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (I->isPHI()) {
+ DEBUG(dbgs() << "Can't hoist: " << *I);
+ return false;
+ }
+
+ // Don't speculate loads. Note that it may be possible and desirable to
+ // speculate GOT or constant pool loads that are guaranteed not to trap,
+ // but we don't support that for now.
+ if (I->mayLoad()) {
+ DEBUG(dbgs() << "Won't speculate load: " << *I);
+ return false;
+ }
+
+ // We never speculate stores, so an AA pointer isn't necessary.
+ bool DontMoveAcrossStore = true;
+ if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) {
+ DEBUG(dbgs() << "Can't speculate: " << *I);
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ for (const MachineOperand &MO : I->operands()) {
+ if (MO.isRegMask()) {
+ DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ return false;
+ }
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ // Remember clobbered regunits.
+ if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ ClobberedRegUnits.set(*Units);
+
+ if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI || DefMI->getParent() != Head)
+ continue;
+ if (InsertAfter.insert(DefMI).second)
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI);
+ if (DefMI->isTerminator()) {
+ DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+
+/// Find an insertion point in Head for the speculated instructions. The
+/// insertion point must be:
+///
+/// 1. Before any terminators.
+/// 2. After any instructions in InsertAfter.
+/// 3. Not have any clobbered regunits live.
+///
+/// This function sets InsertionPoint and returns true when successful, it
+/// returns false if no valid insertion point could be found.
+///
+bool SSAIfConv::findInsertionPoint() {
+ // Keep track of live regunits before the current position.
+ // Only track RegUnits that are also in ClobberedRegUnits.
+ LiveRegUnits.clear();
+ SmallVector<unsigned, 8> Reads;
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ MachineBasicBlock::iterator I = Head->end();
+ MachineBasicBlock::iterator B = Head->begin();
+ while (I != B) {
+ --I;
+ // Some of the conditional code depends in I.
+ if (InsertAfter.count(&*I)) {
+ DEBUG(dbgs() << "Can't insert code after " << *I);
+ return false;
+ }
+
+ // Update live regunits.
+ for (const MachineOperand &MO : I->operands()) {
+ // We're ignoring regmask operands. That is conservatively correct.
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // I clobbers Reg, so it isn't live before I.
+ if (MO.isDef())
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ LiveRegUnits.erase(*Units);
+ // Unless I reads Reg.
+ if (MO.readsReg())
+ Reads.push_back(Reg);
+ }
+ // Anything read by I is live before I.
+ while (!Reads.empty())
+ for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
+ ++Units)
+ if (ClobberedRegUnits.test(*Units))
+ LiveRegUnits.insert(*Units);
+
+ // We can't insert before a terminator.
+ if (I != FirstTerm && I->isTerminator())
+ continue;
+
+ // Some of the clobbered registers are live before I, not a valid insertion
+ // point.
+ if (!LiveRegUnits.empty()) {
+ DEBUG({
+ dbgs() << "Would clobber";
+ for (SparseSet<unsigned>::const_iterator
+ i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i)
+ dbgs() << ' ' << PrintRegUnit(*i, TRI);
+ dbgs() << " live before " << *I;
+ });
+ continue;
+ }
+
+ // This is a valid insertion point.
+ InsertionPoint = I;
+ DEBUG(dbgs() << "Can insert before " << *I);
+ return true;
+ }
+ DEBUG(dbgs() << "No legal insertion point found.\n");
+ return false;
+}
+
+
+
+/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
+/// a potential candidate for if-conversion. Fill out the internal state.
+///
+bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
+ Head = MBB;
+ TBB = FBB = Tail = nullptr;
+
+ if (Head->succ_size() != 2)
+ return false;
+ MachineBasicBlock *Succ0 = Head->succ_begin()[0];
+ MachineBasicBlock *Succ1 = Head->succ_begin()[1];
+
+ // Canonicalize so Succ0 has MBB as its single predecessor.
+ if (Succ0->pred_size() != 1)
+ std::swap(Succ0, Succ1);
+
+ if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
+ return false;
+
+ Tail = Succ0->succ_begin()[0];
+
+ // This is not a triangle.
+ if (Tail != Succ1) {
+ // Check for a diamond. We won't deal with any critical edges.
+ if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
+ Succ1->succ_begin()[0] != Tail)
+ return false;
+ DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << "/BB#" << Succ1->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+
+ // Live-in physregs are tricky to get right when speculating code.
+ if (!Tail->livein_empty()) {
+ DEBUG(dbgs() << "Tail has live-ins.\n");
+ return false;
+ }
+ } else {
+ DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber()
+ << " -> BB#" << Succ0->getNumber()
+ << " -> BB#" << Tail->getNumber() << '\n');
+ }
+
+ // This is a triangle or a diamond.
+ // If Tail doesn't have any phis, there must be side effects.
+ if (Tail->empty() || !Tail->front().isPHI()) {
+ DEBUG(dbgs() << "No phis in tail.\n");
+ return false;
+ }
+
+ // The branch we're looking to eliminate must be analyzable.
+ Cond.clear();
+ if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) {
+ DEBUG(dbgs() << "Branch not analyzable.\n");
+ return false;
+ }
+
+ // This is weird, probably some sort of degenerate CFG.
+ if (!TBB) {
+ DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n");
+ return false;
+ }
+
+ // AnalyzeBranch doesn't set FBB on a fall-through branch.
+ // Make sure it is always set.
+ FBB = TBB == Succ0 ? Succ1 : Succ0;
+
+ // Any phis in the tail block must be convertible to selects.
+ PHIs.clear();
+ MachineBasicBlock *TPred = getTPred();
+ MachineBasicBlock *FPred = getFPred();
+ for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
+ I != E && I->isPHI(); ++I) {
+ PHIs.push_back(&*I);
+ PHIInfo &PI = PHIs.back();
+ // Find PHI operands corresponding to TPred and FPred.
+ for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {
+ if (PI.PHI->getOperand(i+1).getMBB() == TPred)
+ PI.TReg = PI.PHI->getOperand(i).getReg();
+ if (PI.PHI->getOperand(i+1).getMBB() == FPred)
+ PI.FReg = PI.PHI->getOperand(i).getReg();
+ }
+ assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI");
+ assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI");
+
+ // Get target information.
+ if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
+ PI.CondCycles, PI.TCycles, PI.FCycles)) {
+ DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
+ return false;
+ }
+ }
+
+ // Check that the conditional instructions can be speculated.
+ InsertAfter.clear();
+ ClobberedRegUnits.reset();
+ if (TBB != Tail && !canSpeculateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canSpeculateInstrs(FBB))
+ return false;
+
+ // Try to find a valid insertion point for the speculated instructions in the
+ // head basic block.
+ if (!findInsertionPoint())
+ return false;
+
+ if (isTriangle())
+ ++NumTrianglesSeen;
+ else
+ ++NumDiamondsSeen;
+ return true;
+}
+
+/// replacePHIInstrs - Completely replace PHI instructions with selects.
+/// This is possible when the only Tail predecessors are the if-converted
+/// blocks.
+void SSAIfConv::replacePHIInstrs() {
+ assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ unsigned DstReg = PI.PHI->getOperand(0).getReg();
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
+ PI.PHI->eraseFromParent();
+ PI.PHI = nullptr;
+ }
+}
+
+/// rewritePHIOperands - When there are additional Tail predecessors, insert
+/// select instructions in Head and rewrite PHI operands to use the selects.
+/// Keep the PHI instructions in Tail to handle the other predecessors.
+void SSAIfConv::rewritePHIOperands() {
+ MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+ assert(FirstTerm != Head->end() && "No terminators");
+ DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+ // Convert all PHIs to select instructions inserted before FirstTerm.
+ for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+ PHIInfo &PI = PHIs[i];
+ unsigned DstReg = 0;
+
+ DEBUG(dbgs() << "If-converting " << *PI.PHI);
+ if (PI.TReg == PI.FReg) {
+ // We do not need the select instruction if both incoming values are
+ // equal.
+ DstReg = PI.TReg;
+ } else {
+ unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+ DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+ TII->insertSelect(*Head, FirstTerm, HeadDL,
+ DstReg, Cond, PI.TReg, PI.FReg);
+ DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
+ }
+
+ // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
+ for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
+ MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
+ if (MBB == getTPred()) {
+ PI.PHI->getOperand(i-1).setMBB(Head);
+ PI.PHI->getOperand(i-2).setReg(DstReg);
+ } else if (MBB == getFPred()) {
+ PI.PHI->RemoveOperand(i-1);
+ PI.PHI->RemoveOperand(i-2);
+ }
+ }
+ DEBUG(dbgs() << " --> " << *PI.PHI);
+ }
+}
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+ assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+ // Update statistics.
+ if (isTriangle())
+ ++NumTrianglesConv;
+ else
+ ++NumDiamondsConv;
+
+ // Move all instructions into Head, except for the terminators.
+ if (TBB != Tail)
+ Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+ if (FBB != Tail)
+ Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+
+ // Are there extra Tail predecessors?
+ bool ExtraPreds = Tail->pred_size() != 2;
+ if (ExtraPreds)
+ rewritePHIOperands();
+ else
+ replacePHIInstrs();
+
+ // Fix up the CFG, temporarily leave Head without any successors.
+ Head->removeSuccessor(TBB);
+ Head->removeSuccessor(FBB, true);
+ if (TBB != Tail)
+ TBB->removeSuccessor(Tail, true);
+ if (FBB != Tail)
+ FBB->removeSuccessor(Tail, true);
+
+ // Fix up Head's terminators.
+ // It should become a single branch or a fallthrough.
+ DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
+ TII->RemoveBranch(*Head);
+
+ // Erase the now empty conditional blocks. It is likely that Head can fall
+ // through to Tail, and we can join the two blocks.
+ if (TBB != Tail) {
+ RemovedBlocks.push_back(TBB);
+ TBB->eraseFromParent();
+ }
+ if (FBB != Tail) {
+ RemovedBlocks.push_back(FBB);
+ FBB->eraseFromParent();
+ }
+
+ assert(Head->succ_empty() && "Additional head successors?");
+ if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
+ // Splice Tail onto the end of Head.
+ DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
+ << " into head BB#" << Head->getNumber() << '\n');
+ Head->splice(Head->end(), Tail,
+ Tail->begin(), Tail->end());
+ Head->transferSuccessorsAndUpdatePHIs(Tail);
+ RemovedBlocks.push_back(Tail);
+ Tail->eraseFromParent();
+ } else {
+ // We need a branch to Tail, let code placement work it out later.
+ DEBUG(dbgs() << "Converting to unconditional branch.\n");
+ SmallVector<MachineOperand, 0> EmptyCond;
+ TII->InsertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
+ Head->addSuccessor(Tail);
+ }
+ DEBUG(dbgs() << *Head);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyIfConverter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfConverter : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MCSchedModel SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfConverter() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const char *getPassName() const override { return "Early If-Conversion"; }
+
+private:
+ bool tryConvertIf(MachineBasicBlock*);
+ void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
+ void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
+ void invalidateTraces();
+ bool shouldConvertIf();
+};
+} // end anonymous namespace
+
+char EarlyIfConverter::ID = 0;
+char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(EarlyIfConverter,
+ "early-ifcvt", "Early If Converter", false, false)
+
+void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Update the dominator tree after if-conversion erased some blocks.
+void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) {
+ // convertIf can remove TBB, FBB, and Tail can be merged into Head.
+ // TBB and FBB should not dominate any blocks.
+ // Tail children should be transferred to Head.
+ MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
+ MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ assert(Node != HeadNode && "Cannot erase the head node");
+ while (Node->getNumChildren()) {
+ assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
+ DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
+ }
+ DomTree->eraseNode(Removed[i]);
+ }
+}
+
+/// Update LoopInfo after if-conversion.
+void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
+ if (!Loops)
+ return;
+ // If-conversion doesn't change loop structure, and it doesn't mess with back
+ // edges, so updating LoopInfo is simply removing the dead blocks.
+ for (unsigned i = 0, e = Removed.size(); i != e; ++i)
+ Loops->removeBlock(Removed[i]);
+}
+
+/// Invalidate MachineTraceMetrics before if-conversion.
+void EarlyIfConverter::invalidateTraces() {
+ Traces->verifyAnalysis();
+ Traces->invalidate(IfConv.Head);
+ Traces->invalidate(IfConv.Tail);
+ Traces->invalidate(IfConv.TBB);
+ Traces->invalidate(IfConv.FBB);
+ Traces->verifyAnalysis();
+}
+
+// Adjust cycles with downward saturation.
+static unsigned adjCycles(unsigned Cyc, int Delta) {
+ if (Delta < 0 && Cyc + Delta > Cyc)
+ return 0;
+ return Cyc + Delta;
+}
+
+/// Apply cost model and heuristics to the if-conversion in IfConv.
+/// Return true if the conversion is a good idea.
+///
+bool EarlyIfConverter::shouldConvertIf() {
+ // Stress testing mode disables all cost considerations.
+ if (Stress)
+ return true;
+
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+ MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
+ MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
+ DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+ unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
+ FBBTrace.getCriticalPath());
+
+ // Set a somewhat arbitrary limit on the critical path extension we accept.
+ unsigned CritLimit = SchedModel.MispredictPenalty/2;
+
+ // If-conversion only makes sense when there is unexploited ILP. Compute the
+ // maximum-ILP resource length of the trace after if-conversion. Compare it
+ // to the shortest critical path.
+ SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
+ if (IfConv.TBB != IfConv.Tail)
+ ExtraBlocks.push_back(IfConv.TBB);
+ unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
+ DEBUG(dbgs() << "Resource length " << ResLength
+ << ", minimal critical path " << MinCrit << '\n');
+ if (ResLength > MinCrit + CritLimit) {
+ DEBUG(dbgs() << "Not enough available ILP.\n");
+ return false;
+ }
+
+ // Assume that the depth of the first head terminator will also be the depth
+ // of the select instruction inserted, as determined by the flag dependency.
+ // TBB / FBB data dependencies may delay the select even more.
+ MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
+ unsigned BranchDepth =
+ HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth;
+ DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+
+ // Look at all the tail phis, and compute the critical path extension caused
+ // by inserting select instructions.
+ MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
+ SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+ unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);
+ unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth;
+ DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+
+ // The condition is pulled into the critical path.
+ unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
+ if (CondDepth > MaxDepth) {
+ unsigned Extra = CondDepth - MaxDepth;
+ DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The TBB value is pulled into the critical path.
+ unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles);
+ if (TDepth > MaxDepth) {
+ unsigned Extra = TDepth - MaxDepth;
+ DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+
+ // The FBB value is pulled into the critical path.
+ unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles);
+ if (FDepth > MaxDepth) {
+ unsigned Extra = FDepth - MaxDepth;
+ DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > CritLimit) {
+ DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
+ // If-convert MBB and update analyses.
+ invalidateTraces();
+ SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks);
+ Changed = true;
+ updateDomTree(RemovedBlocks);
+ updateLoops(RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ // Only run if conversion if the target wants it.
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ if (!STI.enableEarlyIfConversion())
+ return false;
+
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ SchedModel = STI.getSchedModel();
+ MRI = &MF.getRegInfo();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = nullptr;
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (auto DomNode : post_order(DomTree))
+ if (tryConvertIf(DomNode->getBlock()))
+ Changed = true;
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 000000000000..aea7c31ba316
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,97 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+ cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+ /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ EC.clear();
+ EC.grow(2 * MF->getNumBlockIDs());
+
+ for (const auto &MBB : *MF) {
+ unsigned OutE = 2 * MBB.getNumber() + 1;
+ // Join the outgoing bundle with the ingoing bundles of all successors.
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ EC.join(OutE, 2 * (*SI)->getNumber());
+ }
+ EC.compress();
+ if (ViewEdgeBundles)
+ view();
+
+ // Compute the reverse mapping.
+ Blocks.clear();
+ Blocks.resize(getNumBundles());
+
+ for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
+ unsigned b0 = getBundle(i, 0);
+ unsigned b1 = getBundle(i, 1);
+ Blocks[b0].push_back(i);
+ if (b1 != b0)
+ Blocks[b1].push_back(i);
+ }
+
+ return false;
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+namespace llvm {
+template<>
+raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames,
+ const Twine &Title) {
+ const MachineFunction *MF = G.getMachineFunction();
+
+ O << "digraph {\n";
+ for (const auto &MBB : *MF) {
+ unsigned BB = MBB.getNumber();
+ O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+ << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+ << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+ << "\" [ color=lightgray ]\n";
+ }
+ O << "}\n";
+ return O;
+}
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+ ViewGraph(*this, "EdgeBundles");
+}
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
new file mode 100644
index 000000000000..566b8d507b2b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -0,0 +1,811 @@
+//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the execution dependency fix pass.
+//
+// Some X86 SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains. ARM cores
+// have similar issues when they are configured with both VFP and NEON
+// pipelines.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "execution-fix"
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact that the register is now available in multiple
+/// domains.
+namespace {
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Bitmask of available domains. For an open DomainValue, it is the still
+ // possible domains for collapsing. For a collapsed DomainValue it is the
+ // domains where the register is available for free.
+ unsigned AvailableDomains;
+
+ // Pointer to the next DomainValue in a chain. When two DomainValues are
+ // merged, Victim.Next is set to point to Victor, so old DomainValue
+ // references can be updated by following the chain.
+ DomainValue *Next;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // A collapsed DomainValue has no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool isCollapsed() const { return Instrs.empty(); }
+
+ // Is domain available?
+ bool hasDomain(unsigned domain) const {
+ assert(domain <
+ static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
+ "undefined behavior");
+ return AvailableDomains & (1u << domain);
+ }
+
+ // Mark domain as available.
+ void addDomain(unsigned domain) {
+ AvailableDomains |= 1u << domain;
+ }
+
+ // Restrict to a single domain available.
+ void setSingleDomain(unsigned domain) {
+ AvailableDomains = 1u << domain;
+ }
+
+ // Return bitmask of domains that are available and in mask.
+ unsigned getCommonDomains(unsigned mask) const {
+ return AvailableDomains & mask;
+ }
+
+ // First domain available.
+ unsigned getFirstDomain() const {
+ return countTrailingZeros(AvailableDomains);
+ }
+
+ DomainValue() : Refs(0) { clear(); }
+
+ // Clear this DomainValue and point to next which has all its data.
+ void clear() {
+ AvailableDomains = 0;
+ Next = nullptr;
+ Instrs.clear();
+ }
+};
+}
+
+namespace {
+/// Information about a live register.
+struct LiveReg {
+ /// Value currently in this register, or NULL when no value is being tracked.
+ /// This counts as a DomainValue reference.
+ DomainValue *Value;
+
+ /// Instruction that defined this register, relative to the beginning of the
+ /// current basic block. When a LiveReg is used to represent a live-out
+ /// register, this value is relative to the end of the basic block, so it
+ /// will be a negative number.
+ int Def;
+};
+} // anonymous namespace
+
+namespace {
+class ExeDepsFix : public MachineFunctionPass {
+ static char ID;
+ SpecificBumpPtrAllocator<DomainValue> Allocator;
+ SmallVector<DomainValue*,16> Avail;
+
+ const TargetRegisterClass *const RC;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ std::vector<SmallVector<int, 1>> AliasMap;
+ const unsigned NumRegs;
+ LiveReg *LiveRegs;
+ typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
+ LiveOutMap LiveOuts;
+
+ /// List of undefined register reads in this block in forward order.
+ std::vector<std::pair<MachineInstr*, unsigned> > UndefReads;
+
+ /// Storage for register unit liveness.
+ LivePhysRegs LiveRegSet;
+
+ /// Current instruction number.
+ /// The first instruction in each basic block is 0.
+ int CurInstr;
+
+ /// True when the current block has a predecessor that hasn't been visited
+ /// yet.
+ bool SeenUnknownBackEdge;
+
+public:
+ ExeDepsFix(const TargetRegisterClass *rc)
+ : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
+ const char *getPassName() const override {
+ return "Execution dependency fix";
+ }
+
+private:
+ iterator_range<SmallVectorImpl<int>::const_iterator>
+ regIndices(unsigned Reg) const;
+
+ // DomainValue allocation.
+ DomainValue *alloc(int domain = -1);
+ DomainValue *retain(DomainValue *DV) {
+ if (DV) ++DV->Refs;
+ return DV;
+ }
+ void release(DomainValue*);
+ DomainValue *resolve(DomainValue*&);
+
+ // LiveRegs manipulations.
+ void setLiveReg(int rx, DomainValue *DV);
+ void kill(int rx);
+ void force(int rx, unsigned domain);
+ void collapse(DomainValue *dv, unsigned domain);
+ bool merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock(MachineBasicBlock*);
+ void leaveBasicBlock(MachineBasicBlock*);
+ void visitInstr(MachineInstr*);
+ void processDefs(MachineInstr*, bool Kill);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
+ bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
+ void processUndefReads(MachineBasicBlock*);
+};
+}
+
+char ExeDepsFix::ID = 0;
+
+/// Translate TRI register number to a list of indices into our smaller tables
+/// of interesting registers.
+iterator_range<SmallVectorImpl<int>::const_iterator>
+ExeDepsFix::regIndices(unsigned Reg) const {
+ assert(Reg < AliasMap.size() && "Invalid register");
+ const auto &Entry = AliasMap[Reg];
+ return make_range(Entry.begin(), Entry.end());
+}
+
+DomainValue *ExeDepsFix::alloc(int domain) {
+ DomainValue *dv = Avail.empty() ?
+ new(Allocator.Allocate()) DomainValue :
+ Avail.pop_back_val();
+ if (domain >= 0)
+ dv->addDomain(domain);
+ assert(dv->Refs == 0 && "Reference count wasn't cleared");
+ assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
+ return dv;
+}
+
+/// Release a reference to DV. When the last reference is released,
+/// collapse if needed.
+void ExeDepsFix::release(DomainValue *DV) {
+ while (DV) {
+ assert(DV->Refs && "Bad DomainValue");
+ if (--DV->Refs)
+ return;
+
+ // There are no more DV references. Collapse any contained instructions.
+ if (DV->AvailableDomains && !DV->isCollapsed())
+ collapse(DV, DV->getFirstDomain());
+
+ DomainValue *Next = DV->Next;
+ DV->clear();
+ Avail.push_back(DV);
+ // Also release the next DomainValue in the chain.
+ DV = Next;
+ }
+}
+
+/// Follow the chain of dead DomainValues until a live DomainValue is reached.
+/// Update the referenced pointer when necessary.
+DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+ DomainValue *DV = DVRef;
+ if (!DV || !DV->Next)
+ return DV;
+
+ // DV has a chain. Find the end.
+ do DV = DV->Next;
+ while (DV->Next);
+
+ // Update DVRef to point to DV.
+ retain(DV);
+ release(DVRef);
+ DVRef = DV;
+ return DV;
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(LiveRegs && "Must enter basic block first.");
+
+ if (LiveRegs[rx].Value == dv)
+ return;
+ if (LiveRegs[rx].Value)
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = retain(dv);
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void ExeDepsFix::kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(LiveRegs && "Must enter basic block first.");
+ if (!LiveRegs[rx].Value)
+ return;
+
+ release(LiveRegs[rx].Value);
+ LiveRegs[rx].Value = nullptr;
+}
+
+/// Force register rx into domain.
+void ExeDepsFix::force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ assert(LiveRegs && "Must enter basic block first.");
+ if (DomainValue *dv = LiveRegs[rx].Value) {
+ if (dv->isCollapsed())
+ dv->addDomain(domain);
+ else if (dv->hasDomain(domain))
+ collapse(dv, domain);
+ else {
+ // This is an incompatible open DomainValue. Collapse it to whatever and
+ // force the new value into domain. This costs a domain crossing.
+ collapse(dv, dv->getFirstDomain());
+ assert(LiveRegs[rx].Value && "Not live after collapse?");
+ LiveRegs[rx].Value->addDomain(domain);
+ }
+ } else {
+ // Set up basic collapsed DomainValue.
+ setLiveReg(rx, alloc(domain));
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->hasDomain(domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty())
+ TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain);
+ dv->setSingleDomain(domain);
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (LiveRegs && dv->Refs > 1)
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx].Value == dv)
+ setLiveReg(rx, alloc(domain));
+}
+
+/// All instructions and registers in B are moved to A, and B is released.
+bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
+ assert(!A->isCollapsed() && "Cannot merge into collapsed");
+ assert(!B->isCollapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ // Restrict to the domains that A and B have in common.
+ unsigned common = A->getCommonDomains(B->AvailableDomains);
+ if (!common)
+ return false;
+ A->AvailableDomains = common;
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+
+ // Clear the old DomainValue so we won't try to swizzle instructions twice.
+ B->clear();
+ // All uses of B are referred to A.
+ B->Next = retain(A);
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ assert(LiveRegs && "no space allocated for live registers");
+ if (LiveRegs[rx].Value == B)
+ setLiveReg(rx, A);
+ }
+ return true;
+}
+
+/// Set up LiveRegs by merging predecessor live-out values.
+void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
+ // Detect back-edges from predecessors we haven't processed yet.
+ SeenUnknownBackEdge = false;
+
+ // Reset instruction counter in each basic block.
+ CurInstr = 0;
+
+ // Set up UndefReads to track undefined register reads.
+ UndefReads.clear();
+ LiveRegSet.clear();
+
+ // Set up LiveRegs to represent registers entering MBB.
+ if (!LiveRegs)
+ LiveRegs = new LiveReg[NumRegs];
+
+ // Default values are 'nothing happened a long time ago'.
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ LiveRegs[rx].Value = nullptr;
+ LiveRegs[rx].Def = -(1 << 20);
+ }
+
+ // This is the entry block.
+ if (MBB->pred_empty()) {
+ for (const auto &LI : MBB->liveins()) {
+ for (int rx : regIndices(LI.PhysReg)) {
+ // Treat function live-ins as if they were defined just before the first
+ // instruction. Usually, function arguments are set up immediately
+ // before the call.
+ LiveRegs[rx].Def = -1;
+ }
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
+ return;
+ }
+
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) {
+ SeenUnknownBackEdge = true;
+ continue;
+ }
+ assert(fi->second && "Can't have NULL entries");
+
+ for (unsigned rx = 0; rx != NumRegs; ++rx) {
+ // Use the most recent predecessor def for each register.
+ LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+
+ DomainValue *pdv = resolve(fi->second[rx].Value);
+ if (!pdv)
+ continue;
+ if (!LiveRegs[rx].Value) {
+ setLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx].Value->isCollapsed()) {
+ // We are already collapsed, but predecessor is not. Force it.
+ unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
+ if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
+ collapse(pdv, Domain);
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->isCollapsed())
+ merge(LiveRegs[rx].Value, pdv);
+ else
+ force(rx, pdv->getFirstDomain());
+ }
+ }
+ DEBUG(dbgs() << "BB#" << MBB->getNumber()
+ << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+}
+
+void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+ assert(LiveRegs && "Must enter basic block first.");
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ // Also use LiveOuts as a visited set to detect back-edges.
+ bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
+
+ if (First) {
+ // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to
+ // the end of this block instead of the beginning.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ LiveRegs[i].Def -= CurInstr;
+ } else {
+ // Insertion failed, this must be the second pass.
+ // Release all the DomainValues instead of keeping them.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ release(LiveRegs[i].Value);
+ delete[] LiveRegs;
+ }
+ LiveRegs = nullptr;
+}
+
+void ExeDepsFix::visitInstr(MachineInstr *MI) {
+ if (MI->isDebugValue())
+ return;
+
+ // Update instructions with explicit execution domains.
+ std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
+ if (DomP.first) {
+ if (DomP.second)
+ visitSoftInstr(MI, DomP.second);
+ else
+ visitHardInstr(MI, DomP.first);
+ }
+
+ // Process defs to track register ages, and kill values clobbered by generic
+ // instructions.
+ processDefs(MI, !DomP.first);
+}
+
+/// \brief Return true to if it makes sense to break dependence on a partial def
+/// or undef use.
+bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
+ unsigned reg = MI->getOperand(OpIdx).getReg();
+ for (int rx : regIndices(reg)) {
+ unsigned Clearance = CurInstr - LiveRegs[rx].Def;
+ DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+
+ if (Pref > Clearance) {
+ DEBUG(dbgs() << ": Break dependency.\n");
+ continue;
+ }
+ // The current clearance seems OK, but we may be ignoring a def from a
+ // back-edge.
+ if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+ DEBUG(dbgs() << ": OK .\n");
+ return false;
+ }
+ // A def from an unprocessed back-edge may make us break this dependency.
+ DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
+ return false;
+ }
+ return true;
+}
+
+// Update def-ages for registers defined by MI.
+// If Kill is set, also kill off DomainValues clobbered by the defs.
+//
+// Also break dependencies on partial defs and undef uses.
+void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+ assert(!MI->isDebugValue() && "Won't process debug values");
+
+ // Break dependence on undef uses. Do this before updating LiveRegs below.
+ unsigned OpNum;
+ unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
+ if (Pref) {
+ if (shouldBreakDependence(MI, OpNum, Pref))
+ UndefReads.push_back(std::make_pair(MI, OpNum));
+ }
+ const MCInstrDesc &MCID = MI->getDesc();
+ for (unsigned i = 0,
+ e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+ i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isImplicit())
+ break;
+ if (MO.isUse())
+ continue;
+ for (int rx : regIndices(MO.getReg())) {
+ // This instruction explicitly defines rx.
+ DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+ << '\t' << *MI);
+
+ // Check clearance before partial register updates.
+ // Call breakDependence before setting LiveRegs[rx].Def.
+ unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
+ if (Pref && shouldBreakDependence(MI, i, Pref))
+ TII->breakPartialRegDependency(*MI, i, TRI);
+
+ // How many instructions since rx was last written?
+ LiveRegs[rx].Def = CurInstr;
+
+ // Kill off domains redefined by generic instructions.
+ if (Kill)
+ kill(rx);
+ }
+ }
+ ++CurInstr;
+}
+
+/// \break Break false dependencies on undefined register reads.
+///
+/// Walk the block backward computing precise liveness. This is expensive, so we
+/// only do it on demand. Note that the occurrence of undefined register reads
+/// that should be broken is very rare, but when they occur we may have many in
+/// a single block.
+void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
+ if (UndefReads.empty())
+ return;
+
+ // Collect this block's live out register units.
+ LiveRegSet.init(TRI);
+ // We do not need to care about pristine registers as they are just preserved
+ // but not actually used in the function.
+ LiveRegSet.addLiveOutsNoPristines(*MBB);
+
+ MachineInstr *UndefMI = UndefReads.back().first;
+ unsigned OpIdx = UndefReads.back().second;
+
+ for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
+ // Update liveness, including the current instruction's defs.
+ LiveRegSet.stepBackward(I);
+
+ if (UndefMI == &I) {
+ if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
+ TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI);
+
+ UndefReads.pop_back();
+ if (UndefReads.empty())
+ return;
+
+ UndefMI = UndefReads.back().first;
+ OpIdx = UndefReads.back().second;
+ }
+ }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ for (int rx : regIndices(mo.getReg())) {
+ force(rx, domain);
+ }
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ for (int rx : regIndices(mo.getReg())) {
+ kill(rx);
+ force(rx, domain);
+ }
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Bitmask of available domains for this instruction after taking collapsed
+ // operands into account.
+ unsigned available = mask;
+
+ // Scan the explicit use operands for incoming domains.
+ SmallVector<int, 4> used;
+ if (LiveRegs)
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ for (int rx : regIndices(mo.getReg())) {
+ DomainValue *dv = LiveRegs[rx].Value;
+ if (dv == nullptr)
+ continue;
+ // Bitmask of domains that dv and available have in common.
+ unsigned common = dv->getCommonDomains(available);
+ // Is it possible to use this collapsed register for free?
+ if (dv->isCollapsed()) {
+ // Restrict available domains to the ones in common with the operand.
+ // If there are no common domains, we must pay the cross-domain
+ // penalty for this operand.
+ if (common) available = common;
+ } else if (common)
+ // Open DomainValue is compatible, save it for merging.
+ used.push_back(rx);
+ else
+ // Open DomainValue is not compatible with instruction. It is useless
+ // now.
+ kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(available)) {
+ unsigned domain = countTrailingZeros(available);
+ TII->setExecutionDomain(*mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match available, and build a list of
+ // incoming DomainValues that we want to merge.
+ SmallVector<LiveReg, 4> Regs;
+ for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ assert(LiveRegs && "no space allocated for live registers");
+ const LiveReg &LR = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!LR.Value->getCommonDomains(available)) {
+ kill(rx);
+ continue;
+ }
+ // Sorted insertion.
+ bool Inserted = false;
+ for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end();
+ i != e && !Inserted; ++i) {
+ if (LR.Def < i->Def) {
+ Inserted = true;
+ Regs.insert(i, LR);
+ }
+ }
+ if (!Inserted)
+ Regs.push_back(LR);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = nullptr;
+ while (!Regs.empty()) {
+ if (!dv) {
+ dv = Regs.pop_back_val().Value;
+ // Force the first dv to match the current instruction.
+ dv->AvailableDomains = dv->getCommonDomains(available);
+ assert(dv->AvailableDomains && "Domain should have been filtered");
+ continue;
+ }
+
+ DomainValue *Latest = Regs.pop_back_val().Value;
+ // Skip already merged values.
+ if (Latest == dv || Latest->Next)
+ continue;
+ if (merge(dv, Latest))
+ continue;
+
+ // If latest didn't merge, it is useless now. Kill all registers using it.
+ for (int i : used) {
+ assert(LiveRegs && "no space allocated for live registers");
+ if (LiveRegs[i].Value == Latest)
+ kill(i);
+ }
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv) {
+ dv = alloc();
+ dv->AvailableDomains = available;
+ }
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv. We must iterate through
+ // all the operators, including imp-def ones.
+ for (MachineInstr::mop_iterator ii = mi->operands_begin(),
+ ee = mi->operands_end();
+ ii != ee; ++ii) {
+ MachineOperand &mo = *ii;
+ if (!mo.isReg()) continue;
+ for (int rx : regIndices(mo.getReg())) {
+ if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
+ kill(rx);
+ setLiveReg(rx, dv);
+ }
+ }
+ }
+}
+
+bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(*mf.getFunction()))
+ return false;
+ MF = &mf;
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ LiveRegs = nullptr;
+ assert(NumRegs == RC->getNumRegs() && "Bad regclass");
+
+ DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
+ << TRI->getRegClassName(RC) << " **********\n");
+
+ // If no relevant registers are used in the function, we can skip it
+ // completely.
+ bool anyregs = false;
+ const MachineRegisterInfo &MRI = mf.getRegInfo();
+ for (unsigned Reg : *RC) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ anyregs = true;
+ break;
+ }
+ }
+ if (!anyregs) return false;
+
+ // Initialize the AliasMap on the first use.
+ if (AliasMap.empty()) {
+ // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and
+ // therefore the LiveRegs array.
+ AliasMap.resize(TRI->getNumRegs());
+ for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
+ for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true);
+ AI.isValid(); ++AI)
+ AliasMap[*AI].push_back(i);
+ }
+
+ MachineBasicBlock *Entry = &*MF->begin();
+ ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
+ SmallVector<MachineBasicBlock*, 16> Loops;
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock *MBB = *MBBI;
+ enterBasicBlock(MBB);
+ if (SeenUnknownBackEdge)
+ Loops.push_back(MBB);
+ for (MachineInstr &MI : *MBB)
+ visitInstr(&MI);
+ processUndefReads(MBB);
+ leaveBasicBlock(MBB);
+ }
+
+ // Visit all the loop blocks again in order to merge DomainValues from
+ // back-edges.
+ for (MachineBasicBlock *MBB : Loops) {
+ enterBasicBlock(MBB);
+ for (MachineInstr &MI : *MBB)
+ if (!MI.isDebugValue())
+ processDefs(&MI, false);
+ processUndefReads(MBB);
+ leaveBasicBlock(MBB);
+ }
+
+ // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+ if (FI == LiveOuts.end() || !FI->second)
+ continue;
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ if (FI->second[i].Value)
+ release(FI->second[i].Value);
+ delete[] FI->second;
+ }
+ LiveOuts.clear();
+ UndefReads.clear();
+ Avail.clear();
+ Allocator.DestroyAll();
+
+ return false;
+}
+
+FunctionPass *
+llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
+ return new ExeDepsFix(RC);
+}
diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 000000000000..0ec79c2e69f9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,74 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Pseudo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+
+namespace {
+ class ExpandISelPseudos : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+ private:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+ "Expand ISel Pseudo-instructions", false, false)
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+
+ // Iterate through each instruction in the function, looking for pseudos.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE; ) {
+ MachineInstr &MI = *MBBI++;
+
+ // If MI is a pseudo, expand it.
+ if (MI.usesCustomInsertionHook()) {
+ Changed = true;
+ MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
+ // The expansion may involve new basic blocks.
+ if (NewMBB != MBB) {
+ MBB = NewMBB;
+ I = NewMBB->getIterator();
+ MBBI = NewMBB->begin();
+ MBBE = NewMBB->end();
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
new file mode 100644
index 000000000000..ab2382e2db6d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -0,0 +1,223 @@
+//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo
+// instructions after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "postrapseudos"
+
+namespace {
+struct ExpandPostRA : public MachineFunctionPass {
+private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandPostRA() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// runOnMachineFunction - pass entry point
+ bool runOnMachineFunction(MachineFunction&) override;
+
+private:
+ bool LowerSubregToReg(MachineInstr *MI);
+ bool LowerCopy(MachineInstr *MI);
+
+ void TransferImplicitOperands(MachineInstr *MI);
+};
+} // end anonymous namespace
+
+char ExpandPostRA::ID = 0;
+char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
+
+INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
+ "Post-RA pseudo instruction expansion pass", false, false)
+
+/// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit
+/// operands from MI to the replacement instruction.
+void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ for (const MachineOperand &MO : MI->implicit_operands())
+ if (MO.isReg())
+ CopyMI->addOperand(MO);
+}
+
+bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
+ MachineBasicBlock *MBB = MI->getParent();
+ assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+ MI->getOperand(1).isImm() &&
+ (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+ MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned InsReg = MI->getOperand(2).getReg();
+ assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
+ unsigned SubIdx = MI->getOperand(3).getImm();
+
+ assert(SubIdx != 0 && "Invalid index for insert_subreg");
+ unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Insert destination must be in a physical register");
+ assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ "Inserted value must be in a physical register");
+
+ DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+ if (MI->allDefsAreDead()) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "subreg: replaced by: " << *MI);
+ return true;
+ }
+
+ if (DstSubReg == InsReg) {
+ // No need to insert an identity copy instruction.
+ // Watch out for case like this:
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
+ // We must leave %RAX live.
+ if (DstReg != InsReg) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(3); // SubIdx
+ MI->RemoveOperand(1); // Imm
+ DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ return true;
+ }
+ DEBUG(dbgs() << "subreg: eliminated!");
+ } else {
+ TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+ MI->getOperand(2).isKill());
+
+ // Implicitly define DstReg for subsequent uses.
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+ CopyMI->addRegisterDefined(DstReg);
+ DEBUG(dbgs() << "subreg: " << *CopyMI);
+ }
+
+ DEBUG(dbgs() << '\n');
+ MBB->erase(MI);
+ return true;
+}
+
+bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
+
+ if (MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "dead copy: " << *MI);
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+ }
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if (SrcMO.getReg() == DstMO.getReg()) {
+ DEBUG(dbgs() << "identity copy: " << *MI);
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
+ return true;
+ }
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return true;
+ }
+
+ DEBUG(dbgs() << "real copy: " << *MI);
+ TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
+ DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
+
+ if (MI->getNumOperands() > 2)
+ TransferImplicitOperands(MI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "replaced by: " << *(--dMI);
+ });
+ MI->eraseFromParent();
+ return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Machine Function\n"
+ << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ bool MadeChange = false;
+
+ for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+ mbbi != mbbe; ++mbbi) {
+ for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+ mi != me;) {
+ MachineInstr &MI = *mi;
+ // Advance iterator here because MI may be erased.
+ ++mi;
+
+ // Only expand pseudos.
+ if (!MI.isPseudo())
+ continue;
+
+ // Give targets a chance to expand even standard pseudos.
+ if (TII->expandPostRAPseudo(MI)) {
+ MadeChange = true;
+ continue;
+ }
+
+ // Expand standard pseudos.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::SUBREG_TO_REG:
+ MadeChange |= LowerSubregToReg(&MI);
+ break;
+ case TargetOpcode::COPY:
+ MadeChange |= LowerCopy(&MI);
+ break;
+ case TargetOpcode::DBG_VALUE:
+ continue;
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::EXTRACT_SUBREG:
+ llvm_unreachable("Sub-register pseudos should have been eliminated.");
+ }
+ }
+ }
+
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm/lib/CodeGen/FaultMaps.cpp
new file mode 100644
index 000000000000..2acafafdb9fc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/FaultMaps.cpp
@@ -0,0 +1,150 @@
+//===---------------------------- FaultMaps.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/FaultMaps.h"
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "faultmaps"
+
+static const int FaultMapVersion = 1;
+const char *FaultMaps::WFMP = "Fault Maps: ";
+
+FaultMaps::FaultMaps(AsmPrinter &AP) : AP(AP) {}
+
+void FaultMaps::recordFaultingOp(FaultKind FaultTy,
+ const MCSymbol *HandlerLabel) {
+ MCContext &OutContext = AP.OutStreamer->getContext();
+ MCSymbol *FaultingLabel = OutContext.createTempSymbol();
+
+ AP.OutStreamer->EmitLabel(FaultingLabel);
+
+ const MCExpr *FaultingOffset = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(FaultingLabel, OutContext),
+ MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
+
+ const MCExpr *HandlerOffset = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(HandlerLabel, OutContext),
+ MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
+
+ FunctionInfos[AP.CurrentFnSym].emplace_back(FaultTy, FaultingOffset,
+ HandlerOffset);
+}
+
+void FaultMaps::serializeToFaultMapSection() {
+ if (FunctionInfos.empty())
+ return;
+
+ MCContext &OutContext = AP.OutStreamer->getContext();
+ MCStreamer &OS = *AP.OutStreamer;
+
+ // Create the section.
+ MCSection *FaultMapSection =
+ OutContext.getObjectFileInfo()->getFaultMapSection();
+ OS.SwitchSection(FaultMapSection);
+
+ // Emit a dummy symbol to force section inclusion.
+ OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps")));
+
+ DEBUG(dbgs() << "********** Fault Map Output **********\n");
+
+ // Header
+ OS.EmitIntValue(FaultMapVersion, 1); // Version.
+ OS.EmitIntValue(0, 1); // Reserved.
+ OS.EmitIntValue(0, 2); // Reserved.
+
+ DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n");
+ OS.EmitIntValue(FunctionInfos.size(), 4);
+
+ DEBUG(dbgs() << WFMP << "functions:\n");
+
+ for (const auto &FFI : FunctionInfos)
+ emitFunctionInfo(FFI.first, FFI.second);
+}
+
+void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,
+ const FunctionFaultInfos &FFI) {
+ MCStreamer &OS = *AP.OutStreamer;
+
+ DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n");
+ OS.EmitSymbolValue(FnLabel, 8);
+
+ DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n");
+ OS.EmitIntValue(FFI.size(), 4);
+
+ OS.EmitIntValue(0, 4); // Reserved
+
+ for (auto &Fault : FFI) {
+ DEBUG(dbgs() << WFMP << " fault type: "
+ << faultTypeToString(Fault.Kind) << "\n");
+ OS.EmitIntValue(Fault.Kind, 4);
+
+ DEBUG(dbgs() << WFMP << " faulting PC offset: "
+ << *Fault.FaultingOffsetExpr << "\n");
+ OS.EmitValue(Fault.FaultingOffsetExpr, 4);
+
+ DEBUG(dbgs() << WFMP << " fault handler PC offset: "
+ << *Fault.HandlerOffsetExpr << "\n");
+ OS.EmitValue(Fault.HandlerOffsetExpr, 4);
+ }
+}
+
+
+const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) {
+ switch (FT) {
+ default:
+ llvm_unreachable("unhandled fault type!");
+
+ case FaultMaps::FaultingLoad:
+ return "FaultingLoad";
+ }
+}
+
+raw_ostream &llvm::
+operator<<(raw_ostream &OS,
+ const FaultMapParser::FunctionFaultInfoAccessor &FFI) {
+ OS << "Fault kind: "
+ << FaultMaps::faultTypeToString((FaultMaps::FaultKind)FFI.getFaultKind())
+ << ", faulting PC offset: " << FFI.getFaultingPCOffset()
+ << ", handling PC offset: " << FFI.getHandlerPCOffset();
+ return OS;
+}
+
+raw_ostream &llvm::
+operator<<(raw_ostream &OS, const FaultMapParser::FunctionInfoAccessor &FI) {
+ OS << "FunctionAddress: " << format_hex(FI.getFunctionAddr(), 8)
+ << ", NumFaultingPCs: " << FI.getNumFaultingPCs() << "\n";
+ for (unsigned i = 0, e = FI.getNumFaultingPCs(); i != e; ++i)
+ OS << FI.getFunctionFaultInfoAt(i) << "\n";
+ return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const FaultMapParser &FMP) {
+ OS << "Version: " << format_hex(FMP.getFaultMapVersion(), 2) << "\n";
+ OS << "NumFunctions: " << FMP.getNumFunctions() << "\n";
+
+ if (FMP.getNumFunctions() == 0)
+ return OS;
+
+ FaultMapParser::FunctionInfoAccessor FI;
+
+ for (unsigned i = 0, e = FMP.getNumFunctions(); i != e; ++i) {
+ FI = (i == 0) ? FMP.getFirstFunctionInfo() : FI.getNextFunctionInfo();
+ OS << FI;
+ }
+
+ return OS;
+}
diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
new file mode 100644
index 000000000000..b16f81c728d0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -0,0 +1,59 @@
+//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations which result in
+// funclets being contiguous.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "funclet-layout"
+
+namespace {
+class FuncletLayout : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ FuncletLayout() : MachineFunctionPass(ID) {
+ initializeFuncletLayoutPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+};
+}
+
+char FuncletLayout::ID = 0;
+char &llvm::FuncletLayoutID = FuncletLayout::ID;
+INITIALIZE_PASS(FuncletLayout, "funclet-layout",
+ "Contiguously Lay Out Funclets", false, false)
+
+bool FuncletLayout::runOnMachineFunction(MachineFunction &F) {
+ DenseMap<const MachineBasicBlock *, int> FuncletMembership =
+ getFuncletMembership(F);
+ if (FuncletMembership.empty())
+ return false;
+
+ F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
+ auto FuncletX = FuncletMembership.find(&X);
+ auto FuncletY = FuncletMembership.find(&Y);
+ assert(FuncletX != FuncletMembership.end());
+ assert(FuncletY != FuncletMembership.end());
+ return FuncletX->second < FuncletY->second;
+ });
+
+ // Conservatively assume we changed something.
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 000000000000..c8116a453d2d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,177 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+class Printer : public FunctionPass {
+ static char ID;
+ raw_ostream &OS;
+
+public:
+ explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+
+ const char *getPassName() const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnFunction(Function &F) override;
+ bool doFinalization(Module &M) override;
+};
+}
+
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+ "Create Garbage Collector Module Metadata", false, false)
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+ : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo() : ImmutablePass(ID) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+ assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+ assert(F.hasGC());
+
+ finfo_map_type::iterator I = FInfoMap.find(&F);
+ if (I != FInfoMap.end())
+ return *I->second;
+
+ GCStrategy *S = getGCStrategy(F.getGC());
+ Functions.push_back(make_unique<GCFunctionInfo>(F, *S));
+ GCFunctionInfo *GFI = Functions.back().get();
+ FInfoMap[&F] = GFI;
+ return *GFI;
+}
+
+void GCModuleInfo::clear() {
+ Functions.clear();
+ FInfoMap.clear();
+ GCStrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
+ return new Printer(OS);
+}
+
+const char *Printer::getPassName() const {
+ return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+ switch (Kind) {
+ case GC::PreCall:
+ return "pre-call";
+ case GC::PostCall:
+ return "post-call";
+ }
+ llvm_unreachable("Invalid point kind");
+}
+
+bool Printer::runOnFunction(Function &F) {
+ if (F.hasGC())
+ return false;
+
+ GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+
+ OS << "GC roots for " << FD->getFunction().getName() << ":\n";
+ for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+ RE = FD->roots_end();
+ RI != RE; ++RI)
+ OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+
+ OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
+ for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE;
+ ++PI) {
+
+ OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind)
+ << ", live = {";
+
+ for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+ RE = FD->live_end(PI);
+ ;) {
+ OS << " " << RI->Num;
+ if (++RI == RE)
+ break;
+ OS << ",";
+ }
+
+ OS << " }\n";
+ }
+
+ return false;
+}
+
+bool Printer::doFinalization(Module &M) {
+ GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(GMI && "Printer didn't require GCModuleInfo?!");
+ GMI->clear();
+ return false;
+}
+
+GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
+ // TODO: Arguably, just doing a linear search would be faster for small N
+ auto NMI = GCStrategyMap.find(Name);
+ if (NMI != GCStrategyMap.end())
+ return NMI->getValue();
+
+ for (auto& Entry : GCRegistry::entries()) {
+ if (Name == Entry.getName()) {
+ std::unique_ptr<GCStrategy> S = Entry.instantiate();
+ S->Name = Name;
+ GCStrategyMap[Name] = S.get();
+ GCStrategyList.push_back(std::move(S));
+ return GCStrategyList.back().get();
+ }
+ }
+
+ if (GCRegistry::begin() == GCRegistry::end()) {
+ // In normal operation, the registry should not be empty. There should
+ // be the builtin GCs if nothing else. The most likely scenario here is
+ // that we got here without running the initializers used by the Registry
+ // itself and it's registration mechanism.
+ const std::string error = ("unsupported GC: " + Name).str() +
+ " (did you remember to link and initialize the CodeGen library?)";
+ report_fatal_error(error);
+ } else
+ report_fatal_error(std::string("unsupported GC: ") + Name);
+}
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 000000000000..bb8cfa1cc809
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,19 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() {}
+
+GCMetadataPrinter::~GCMetadataPrinter() {}
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
new file mode 100644
index 000000000000..326adab2ba64
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -0,0 +1,355 @@
+//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lowering for the gc.root mechanism.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+/// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
+/// directed by the GCStrategy. It also performs automatic root initialization
+/// and custom intrinsic lowering.
+class LowerIntrinsics : public FunctionPass {
+ bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+
+public:
+ static char ID;
+
+ LowerIntrinsics();
+ const char *getPassName() const override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+};
+
+/// GCMachineCodeAnalysis - This is a target-independent pass over the machine
+/// function representation to identify safe points for the garbage collector
+/// in the machine code. It inserts labels at safe points and populates a
+/// GCMetadata record for each function.
+class GCMachineCodeAnalysis : public MachineFunctionPass {
+ GCFunctionInfo *FI;
+ MachineModuleInfo *MMI;
+ const TargetInstrInfo *TII;
+
+ void FindSafePoints(MachineFunction &MF);
+ void VisitCallPoint(MachineBasicBlock::iterator MI);
+ MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const DebugLoc &DL) const;
+
+ void FindStackOffsets(MachineFunction &MF);
+
+public:
+ static char ID;
+
+ GCMachineCodeAnalysis();
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); }
+
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) {
+ initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+}
+
+const char *LowerIntrinsics::getPassName() const {
+ return "Lower Garbage Collection Instructions";
+}
+
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+static bool NeedsDefaultLoweringPass(const GCStrategy &C) {
+ // Default lowering is necessary only if read or write barriers have a default
+ // action. The default for roots is no action.
+ return !C.customWriteBarrier() || !C.customReadBarrier() ||
+ C.initializeRoots();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+ GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+ assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (!I->isDeclaration() && I->hasGC())
+ MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+
+ return false;
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+static bool CouldBecomeSafePoint(Instruction *I) {
+ // The natural definition of instructions which could introduce safe points
+ // are:
+ //
+ // - call, invoke (AfterCall, BeforeCall)
+ // - phis (Loops)
+ // - invoke, ret, unwind (Exit)
+ //
+ // However, instructions as seemingly inoccuous as arithmetic can become
+ // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+ // it is necessary to take a conservative approach.
+
+ if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I) ||
+ isa<LoadInst>(I))
+ return false;
+
+ // llvm.gcroot is safe because it doesn't do anything at runtime.
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ if (Intrinsic::ID IID = F->getIntrinsicID())
+ if (IID == Intrinsic::gcroot)
+ return false;
+
+ return true;
+}
+
+static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
+ unsigned Count) {
+ // Scroll past alloca instructions.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ while (isa<AllocaInst>(IP))
+ ++IP;
+
+ // Search for initializers in the initial BB.
+ SmallPtrSet<AllocaInst *, 16> InitedRoots;
+ for (; !CouldBecomeSafePoint(&*IP); ++IP)
+ if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+ if (AllocaInst *AI =
+ dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+ InitedRoots.insert(AI);
+
+ // Add root initializers.
+ bool MadeChange = false;
+
+ for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+ if (!InitedRoots.count(*I)) {
+ StoreInst *SI = new StoreInst(
+ ConstantPointerNull::get(cast<PointerType>((*I)->getAllocatedType())),
+ *I);
+ SI->insertAfter(*I);
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use GC.
+ if (!F.hasGC())
+ return false;
+
+ GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+ GCStrategy &S = FI.getStrategy();
+
+ bool MadeChange = false;
+
+ if (NeedsDefaultLoweringPass(S))
+ MadeChange |= PerformDefaultLowering(F, S);
+
+ return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+ bool LowerWr = !S.customWriteBarrier();
+ bool LowerRd = !S.customReadBarrier();
+ bool InitRoots = S.initializeRoots();
+
+ SmallVector<AllocaInst *, 32> Roots;
+
+ bool MadeChange = false;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+ Function *F = CI->getCalledFunction();
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::gcwrite:
+ if (LowerWr) {
+ // Replace a write barrier with a simple store.
+ Value *St =
+ new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
+ CI->replaceAllUsesWith(St);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcread:
+ if (LowerRd) {
+ // Replace a read barrier with a simple load.
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Ld->takeName(CI);
+ CI->replaceAllUsesWith(Ld);
+ CI->eraseFromParent();
+ }
+ break;
+ case Intrinsic::gcroot:
+ if (InitRoots) {
+ // Initialize the GC root, but do not delete the intrinsic. The
+ // backend needs the intrinsic to flag the stack slot.
+ Roots.push_back(
+ cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ }
+ break;
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ }
+ }
+ }
+
+ if (Roots.size())
+ MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+
+ return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
+
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+ "Analyze Machine Code For Garbage Collection", false, false)
+
+GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {}
+
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL) const {
+ MCSymbol *Label = MBB.getParent()->getContext().createTempSymbol();
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+ return Label;
+}
+
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+ // Find the return address (next instruction), too, so as to bracket the call
+ // instruction.
+ MachineBasicBlock::iterator RAI = CI;
+ ++RAI;
+
+ if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+ MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+ }
+
+ if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+ MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+ }
+}
+
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+ for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE;
+ ++BBI)
+ for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end();
+ MI != ME; ++MI)
+ if (MI->isCall()) {
+ // Do not treat tail or sibling call sites as safe points. This is
+ // legal since any arguments passed to the callee which live in the
+ // remnants of the callers frame will be owned and updated by the
+ // callee if required.
+ if (MI->isTerminator())
+ continue;
+ VisitCallPoint(MI);
+ }
+}
+
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ assert(TFI && "TargetRegisterInfo not available!");
+
+ for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
+ RI != FI->roots_end();) {
+ // If the root references a dead object, no need to keep it.
+ if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+ RI = FI->removeStackRoot(RI);
+ } else {
+ unsigned FrameReg; // FIXME: surely GCRoot ought to store the
+ // register that the offset is from?
+ RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
+ ++RI;
+ }
+ }
+}
+
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ // Quick exit for functions that do not use GC.
+ if (!MF.getFunction()->hasGC())
+ return false;
+
+ FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+ MMI = &getAnalysis<MachineModuleInfo>();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ // Find the size of the stack frame. There may be no correct static frame
+ // size, we use UINT64_MAX to represent this.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+ RegInfo->needsStackRealignment(MF);
+ FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize());
+
+ // Find all safe points.
+ if (FI->getStrategy().needsSafePoints())
+ FindSafePoints(MF);
+
+ // Find the concrete stack offsets for all roots (stack slots)
+ FindStackOffsets(MF);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 000000000000..554d326942e9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,22 @@
+//===-- GCStrategy.cpp - Garbage Collector Description --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the policy object GCStrategy which describes the
+// behavior of a given garbage collector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+GCStrategy::GCStrategy()
+ : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
+ CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
+ UsesMetadata(false) {}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
new file mode 100644
index 000000000000..231e5ac82bec
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -0,0 +1,30 @@
+//===-- llvm/CodeGen/GlobalISel/GlobalIsel.cpp --- GlobalISel ----*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+// This file implements the common initialization routines for the
+// GlobalISel library.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+
+void llvm::initializeGlobalISel(PassRegistry &Registry) {
+}
+
+#else
+
+void llvm::initializeGlobalISel(PassRegistry &Registry) {
+ initializeIRTranslatorPass(Registry);
+ initializeRegBankSelectPass(Registry);
+}
+#endif // LLVM_BUILD_GLOBAL_ISEL
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
new file mode 100644
index 000000000000..b8a960cfac76
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -0,0 +1,164 @@
+//===-- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the IRTranslator class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Target/TargetLowering.h"
+
+#define DEBUG_TYPE "irtranslator"
+
+using namespace llvm;
+
+char IRTranslator::ID = 0;
+INITIALIZE_PASS(IRTranslator, "irtranslator", "IRTranslator LLVM IR -> MI",
+ false, false);
+
+IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
+ initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
+}
+
+unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
+ unsigned &ValReg = ValToVReg[&Val];
+ // Check if this is the first time we see Val.
+ if (!ValReg) {
+ // Fill ValRegsSequence with the sequence of registers
+ // we need to concat together to produce the value.
+ assert(Val.getType()->isSized() &&
+ "Don't know how to create an empty vreg");
+ assert(!Val.getType()->isAggregateType() && "Not yet implemented");
+ unsigned Size = Val.getType()->getPrimitiveSizeInBits();
+ unsigned VReg = MRI->createGenericVirtualRegister(Size);
+ ValReg = VReg;
+ assert(!isa<Constant>(Val) && "Not yet implemented");
+ }
+ return ValReg;
+}
+
+MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
+ MachineBasicBlock *&MBB = BBToMBB[&BB];
+ if (!MBB) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MBB = MF.CreateMachineBasicBlock();
+ MF.push_back(MBB);
+ }
+ return *MBB;
+}
+
+bool IRTranslator::translateBinaryOp(unsigned Opcode, const Instruction &Inst) {
+ // Get or create a virtual register for each value.
+ // Unless the value is a Constant => loadimm cst?
+ // or inline constant each time?
+ // Creation of a virtual register needs to have a size.
+ unsigned Op0 = getOrCreateVReg(*Inst.getOperand(0));
+ unsigned Op1 = getOrCreateVReg(*Inst.getOperand(1));
+ unsigned Res = getOrCreateVReg(Inst);
+ MIRBuilder.buildInstr(Opcode, Inst.getType(), Res, Op0, Op1);
+ return true;
+}
+
+bool IRTranslator::translateReturn(const Instruction &Inst) {
+ assert(isa<ReturnInst>(Inst) && "Return expected");
+ const Value *Ret = cast<ReturnInst>(Inst).getReturnValue();
+ // The target may mess up with the insertion point, but
+ // this is not important as a return is the last instruction
+ // of the block anyway.
+ return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret));
+}
+
+bool IRTranslator::translateBr(const Instruction &Inst) {
+ assert(isa<BranchInst>(Inst) && "Branch expected");
+ const BranchInst &BrInst = *cast<BranchInst>(&Inst);
+ if (BrInst.isUnconditional()) {
+ const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getOperand(0));
+ MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
+ MIRBuilder.buildInstr(TargetOpcode::G_BR, BrTgt.getType(), TgtBB);
+ } else {
+ assert(0 && "Not yet implemented");
+ }
+ // Link successors.
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ for (const BasicBlock *Succ : BrInst.successors())
+ CurBB.addSuccessor(&getOrCreateBB(*Succ));
+ return true;
+}
+
+bool IRTranslator::translate(const Instruction &Inst) {
+ MIRBuilder.setDebugLoc(Inst.getDebugLoc());
+ switch(Inst.getOpcode()) {
+ case Instruction::Add:
+ return translateBinaryOp(TargetOpcode::G_ADD, Inst);
+ case Instruction::Or:
+ return translateBinaryOp(TargetOpcode::G_OR, Inst);
+ case Instruction::Br:
+ return translateBr(Inst);
+ case Instruction::Ret:
+ return translateReturn(Inst);
+
+ default:
+ llvm_unreachable("Opcode not supported");
+ }
+}
+
+
+void IRTranslator::finalize() {
+ // Release the memory used by the different maps we
+ // needed during the translation.
+ ValToVReg.clear();
+ Constants.clear();
+}
+
+bool IRTranslator::runOnMachineFunction(MachineFunction &MF) {
+ const Function &F = *MF.getFunction();
+ if (F.empty())
+ return false;
+ CLI = MF.getSubtarget().getCallLowering();
+ MIRBuilder.setMF(MF);
+ MRI = &MF.getRegInfo();
+ // Setup the arguments.
+ MachineBasicBlock &MBB = getOrCreateBB(F.front());
+ MIRBuilder.setMBB(MBB);
+ SmallVector<unsigned, 8> VRegArgs;
+ for (const Argument &Arg: F.args())
+ VRegArgs.push_back(getOrCreateVReg(Arg));
+ bool Succeeded =
+ CLI->lowerFormalArguments(MIRBuilder, F.getArgumentList(), VRegArgs);
+ if (!Succeeded)
+ report_fatal_error("Unable to lower arguments");
+
+ for (const BasicBlock &BB: F) {
+ MachineBasicBlock &MBB = getOrCreateBB(BB);
+ // Set the insertion point of all the following translations to
+ // the end of this basic block.
+ MIRBuilder.setMBB(MBB);
+ for (const Instruction &Inst: BB) {
+ bool Succeeded = translate(Inst);
+ if (!Succeeded) {
+ DEBUG(dbgs() << "Cannot translate: " << Inst << '\n');
+ report_fatal_error("Unable to translate instruction");
+ }
+ }
+ }
+
+ // Now that the MachineFrameInfo has been configured, no further changes to
+ // the reserved registers are possible.
+ MRI->freezeReservedRegs(MF);
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
new file mode 100644
index 000000000000..2f19bcf1e68b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -0,0 +1,104 @@
+//===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.cpp - MIBuilder--*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the MachineIRBuidler class.
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+void MachineIRBuilder::setMF(MachineFunction &MF) {
+ this->MF = &MF;
+ this->MBB = nullptr;
+ this->TII = MF.getSubtarget().getInstrInfo();
+ this->DL = DebugLoc();
+ this->MI = nullptr;
+}
+
+void MachineIRBuilder::setMBB(MachineBasicBlock &MBB, bool Beginning) {
+ this->MBB = &MBB;
+ Before = Beginning;
+ assert(&getMF() == MBB.getParent() &&
+ "Basic block is in a different function");
+}
+
+void MachineIRBuilder::setInstr(MachineInstr &MI, bool Before) {
+ assert(MI.getParent() && "Instruction is not part of a basic block");
+ setMBB(*MI.getParent());
+ this->MI = &MI;
+ this->Before = Before;
+}
+
+MachineBasicBlock::iterator MachineIRBuilder::getInsertPt() {
+ if (MI) {
+ if (Before)
+ return MI;
+ if (!MI->getNextNode())
+ return getMBB().end();
+ return MI->getNextNode();
+ }
+ return Before ? getMBB().begin() : getMBB().end();
+}
+
+//------------------------------------------------------------------------------
+// Build instruction variants.
+//------------------------------------------------------------------------------
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty) {
+ MachineInstr *NewMI = BuildMI(getMF(), DL, getTII().get(Opcode));
+ if (Ty) {
+ assert(isPreISelGenericOpcode(Opcode) &&
+ "Only generic instruction can have a type");
+ NewMI->setType(Ty);
+ } else
+ assert(!isPreISelGenericOpcode(Opcode) &&
+ "Generic instruction must have a type");
+ getMBB().insert(getInsertPt(), NewMI);
+ return NewMI;
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
+ unsigned Op0, unsigned Op1) {
+ return buildInstr(Opcode, nullptr, Res, Op0, Op1);
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
+ unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ MachineInstr *NewMI = buildInstr(Opcode, Ty);
+ MachineInstrBuilder(getMF(), NewMI)
+ .addReg(Res, RegState::Define)
+ .addReg(Op0)
+ .addReg(Op1);
+ return NewMI;
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
+ unsigned Op0) {
+ MachineInstr *NewMI = buildInstr(Opcode, nullptr);
+ MachineInstrBuilder(getMF(), NewMI).addReg(Res, RegState::Define).addReg(Op0);
+ return NewMI;
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode) {
+ return buildInstr(Opcode, nullptr);
+}
+
+MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
+ MachineBasicBlock &BB) {
+ MachineInstr *NewMI = buildInstr(Opcode, Ty);
+ MachineInstrBuilder(getMF(), NewMI).addMBB(&BB);
+ return NewMI;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
new file mode 100644
index 000000000000..419e270c9127
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -0,0 +1,897 @@
+//===- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the RegBankSelect class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "regbankselect"
+
+using namespace llvm;
+
+static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
+ cl::desc("Mode of the RegBankSelect pass"), cl::Hidden, cl::Optional,
+ cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast",
+ "Run the Fast mode (default mapping)"),
+ clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy",
+ "Use the Greedy mode (best local mapping)"),
+ clEnumValEnd));
+
+char RegBankSelect::ID = 0;
+INITIALIZE_PASS_BEGIN(RegBankSelect, "regbankselect",
+ "Assign register bank of generic virtual registers",
+ false, false);
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(RegBankSelect, "regbankselect",
+ "Assign register bank of generic virtual registers", false,
+ false);
+
+RegBankSelect::RegBankSelect(Mode RunningMode)
+ : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr),
+ MBFI(nullptr), MBPI(nullptr), OptMode(RunningMode) {
+ initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
+ if (RegBankSelectMode.getNumOccurrences() != 0) {
+ OptMode = RegBankSelectMode;
+ if (RegBankSelectMode != RunningMode)
+ DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n");
+ }
+}
+
+void RegBankSelect::init(MachineFunction &MF) {
+ RBI = MF.getSubtarget().getRegBankInfo();
+ assert(RBI && "Cannot work without RegisterBankInfo");
+ MRI = &MF.getRegInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ if (OptMode != Mode::Fast) {
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ } else {
+ MBFI = nullptr;
+ MBPI = nullptr;
+ }
+ MIRBuilder.setMF(MF);
+}
+
+void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
+ if (OptMode != Mode::Fast) {
+ // We could preserve the information from these two analysis but
+ // the APIs do not allow to do so yet.
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ }
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool RegBankSelect::assignmentMatch(
+ unsigned Reg, const RegisterBankInfo::ValueMapping &ValMapping,
+ bool &OnlyAssign) const {
+ // By default we assume we will have to repair something.
+ OnlyAssign = false;
+ // Each part of a break down needs to end up in a different register.
+ // In other word, Reg assignement does not match.
+ if (ValMapping.BreakDown.size() > 1)
+ return false;
+
+ const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
+ const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+ // Reg is free of assignment, a simple assignment will make the
+ // register bank to match.
+ OnlyAssign = CurRegBank == nullptr;
+ DEBUG(dbgs() << "Does assignment already match: ";
+ if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none";
+ dbgs() << " against ";
+ assert(DesiredRegBrank && "The mapping must be valid");
+ dbgs() << *DesiredRegBrank << '\n';);
+ return CurRegBank == DesiredRegBrank;
+}
+
+void RegBankSelect::repairReg(
+ MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
+ RegBankSelect::RepairingPlacement &RepairPt,
+ const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) {
+ assert(ValMapping.BreakDown.size() == 1 && "Not yet implemented");
+ // An empty range of new register means no repairing.
+ assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair");
+
+ // Assume we are repairing a use and thus, the original reg will be
+ // the source of the repairing.
+ unsigned Src = MO.getReg();
+ unsigned Dst = *NewVRegs.begin();
+
+ // If we repair a definition, swap the source and destination for
+ // the repairing.
+ if (MO.isDef())
+ std::swap(Src, Dst);
+
+ assert((RepairPt.getNumInsertPoints() == 1 ||
+ TargetRegisterInfo::isPhysicalRegister(Dst)) &&
+ "We are about to create several defs for Dst");
+
+ // Build the instruction used to repair, then clone it at the right places.
+ MachineInstr *MI = MIRBuilder.buildInstr(TargetOpcode::COPY, Dst, Src);
+ MI->removeFromParent();
+ DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
+ << '\n');
+ // TODO:
+ // Check if MI is legal. if not, we need to legalize all the
+ // instructions we are going to insert.
+ std::unique_ptr<MachineInstr *[]> NewInstrs(
+ new MachineInstr *[RepairPt.getNumInsertPoints()]);
+ bool IsFirst = true;
+ unsigned Idx = 0;
+ for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) {
+ MachineInstr *CurMI;
+ if (IsFirst)
+ CurMI = MI;
+ else
+ CurMI = MIRBuilder.getMF().CloneMachineInstr(MI);
+ InsertPt->insert(*CurMI);
+ NewInstrs[Idx++] = CurMI;
+ IsFirst = false;
+ }
+ // TODO:
+ // Legalize NewInstrs if need be.
+}
+
+uint64_t RegBankSelect::getRepairCost(
+ const MachineOperand &MO,
+ const RegisterBankInfo::ValueMapping &ValMapping) const {
+ assert(MO.isReg() && "We should only repair register operand");
+ assert(!ValMapping.BreakDown.empty() && "Nothing to map??");
+
+ bool IsSameNumOfValues = ValMapping.BreakDown.size() == 1;
+ const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
+ // If MO does not have a register bank, we should have just been
+ // able to set one unless we have to break the value down.
+ assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair");
+ // Def: Val <- NewDefs
+ // Same number of values: copy
+ // Different number: Val = build_sequence Defs1, Defs2, ...
+ // Use: NewSources <- Val.
+ // Same number of values: copy.
+ // Different number: Src1, Src2, ... =
+ // extract_value Val, Src1Begin, Src1Len, Src2Begin, Src2Len, ...
+ // We should remember that this value is available somewhere else to
+ // coalesce the value.
+
+ if (IsSameNumOfValues) {
+ const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+ // If we repair a definition, swap the source and destination for
+ // the repairing.
+ if (MO.isDef())
+ std::swap(CurRegBank, DesiredRegBrank);
+ // TODO: It may be possible to actually avoid the copy.
+ // If we repair something where the source is defined by a copy
+ // and the source of that copy is on the right bank, we can reuse
+ // it for free.
+ // E.g.,
+ // RegToRepair<BankA> = copy AlternativeSrc<BankB>
+ // = op RegToRepair<BankA>
+ // We can simply propagate AlternativeSrc instead of copying RegToRepair
+ // into a new virtual register.
+ // We would also need to propagate this information in the
+ // repairing placement.
+ unsigned Cost =
+ RBI->copyCost(*DesiredRegBrank, *CurRegBank,
+ RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI));
+ // TODO: use a dedicated constant for ImpossibleCost.
+ if (Cost != UINT_MAX)
+ return Cost;
+ assert(false && "Legalization not available yet");
+ // Return the legalization cost of that repairing.
+ }
+ assert(false && "Complex repairing not implemented yet");
+ return 1;
+}
+
+RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
+ MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings,
+ SmallVectorImpl<RepairingPlacement> &RepairPts) {
+
+ RegisterBankInfo::InstructionMapping *BestMapping = nullptr;
+ MappingCost Cost = MappingCost::ImpossibleCost();
+ SmallVector<RepairingPlacement, 4> LocalRepairPts;
+ for (RegisterBankInfo::InstructionMapping &CurMapping : PossibleMappings) {
+ MappingCost CurCost = computeMapping(MI, CurMapping, LocalRepairPts, &Cost);
+ if (CurCost < Cost) {
+ Cost = CurCost;
+ BestMapping = &CurMapping;
+ RepairPts.clear();
+ for (RepairingPlacement &RepairPt : LocalRepairPts)
+ RepairPts.emplace_back(std::move(RepairPt));
+ }
+ }
+ assert(BestMapping && "No suitable mapping for instruction");
+ return *BestMapping;
+}
+
+void RegBankSelect::tryAvoidingSplit(
+ RegBankSelect::RepairingPlacement &RepairPt, const MachineOperand &MO,
+ const RegisterBankInfo::ValueMapping &ValMapping) const {
+ const MachineInstr &MI = *MO.getParent();
+ assert(RepairPt.hasSplit() && "We should not have to adjust for split");
+ // Splitting should only occur for PHIs or between terminators,
+ // because we only do local repairing.
+ assert((MI.isPHI() || MI.isTerminator()) && "Why do we split?");
+
+ assert(&MI.getOperand(RepairPt.getOpIdx()) == &MO &&
+ "Repairing placement does not match operand");
+
+ // If we need splitting for phis, that means it is because we
+ // could not find an insertion point before the terminators of
+ // the predecessor block for this argument. In other words,
+ // the input value is defined by one of the terminators.
+ assert((!MI.isPHI() || !MO.isDef()) && "Need split for phi def?");
+
+ // We split to repair the use of a phi or a terminator.
+ if (!MO.isDef()) {
+ if (MI.isTerminator()) {
+ assert(&MI != &(*MI.getParent()->getFirstTerminator()) &&
+ "Need to split for the first terminator?!");
+ } else {
+ // For the PHI case, the split may not be actually required.
+ // In the copy case, a phi is already a copy on the incoming edge,
+ // therefore there is no need to split.
+ if (ValMapping.BreakDown.size() == 1)
+ // This is a already a copy, there is nothing to do.
+ RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign);
+ }
+ return;
+ }
+
+ // At this point, we need to repair a defintion of a terminator.
+
+ // Technically we need to fix the def of MI on all outgoing
+ // edges of MI to keep the repairing local. In other words, we
+ // will create several definitions of the same register. This
+ // does not work for SSA unless that definition is a physical
+ // register.
+ // However, there are other cases where we can get away with
+ // that while still keeping the repairing local.
+ assert(MI.isTerminator() && MO.isDef() &&
+ "This code is for the def of a terminator");
+
+ // Since we use RPO traversal, if we need to repair a definition
+ // this means this definition could be:
+ // 1. Used by PHIs (i.e., this VReg has been visited as part of the
+ // uses of a phi.), or
+ // 2. Part of a target specific instruction (i.e., the target applied
+ // some register class constraints when creating the instruction.)
+ // If the constraints come for #2, the target said that another mapping
+ // is supported so we may just drop them. Indeed, if we do not change
+ // the number of registers holding that value, the uses will get fixed
+ // when we get to them.
+ // Uses in PHIs may have already been proceeded though.
+ // If the constraints come for #1, then, those are weak constraints and
+ // no actual uses may rely on them. However, the problem remains mainly
+ // the same as for #2. If the value stays in one register, we could
+ // just switch the register bank of the definition, but we would need to
+ // account for a repairing cost for each phi we silently change.
+ //
+ // In any case, if the value needs to be broken down into several
+ // registers, the repairing is not local anymore as we need to patch
+ // every uses to rebuild the value in just one register.
+ //
+ // To summarize:
+ // - If the value is in a physical register, we can do the split and
+ // fix locally.
+ // Otherwise if the value is in a virtual register:
+ // - If the value remains in one register, we do not have to split
+ // just switching the register bank would do, but we need to account
+ // in the repairing cost all the phi we changed.
+ // - If the value spans several registers, then we cannot do a local
+ // repairing.
+
+ // Check if this is a physical or virtual register.
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // We are going to split every outgoing edges.
+ // Check that this is possible.
+ // FIXME: The machine representation is currently broken
+ // since it also several terminators in one basic block.
+ // Because of that we would technically need a way to get
+ // the targets of just one terminator to know which edges
+ // we have to split.
+ // Assert that we do not hit the ill-formed representation.
+
+ // If there are other terminators before that one, some of
+ // the outgoing edges may not be dominated by this definition.
+ assert(&MI == &(*MI.getParent()->getFirstTerminator()) &&
+ "Do not know which outgoing edges are relevant");
+ const MachineInstr *Next = MI.getNextNode();
+ assert((!Next || Next->isUnconditionalBranch()) &&
+ "Do not know where each terminator ends up");
+ if (Next)
+ // If the next terminator uses Reg, this means we have
+ // to split right after MI and thus we need a way to ask
+ // which outgoing edges are affected.
+ assert(!Next->readsRegister(Reg) && "Need to split between terminators");
+ // We will split all the edges and repair there.
+ } else {
+ // This is a virtual register defined by a terminator.
+ if (ValMapping.BreakDown.size() == 1) {
+ // There is nothing to repair, but we may actually lie on
+ // the repairing cost because of the PHIs already proceeded
+ // as already stated.
+ // Though the code will be correct.
+ assert(0 && "Repairing cost may not be accurate");
+ } else {
+ // We need to do non-local repairing. Basically, patch all
+ // the uses (i.e., phis) that we already proceeded.
+ // For now, just say this mapping is not possible.
+ RepairPt.switchTo(RepairingPlacement::RepairingKind::Impossible);
+ }
+ }
+}
+
+RegBankSelect::MappingCost RegBankSelect::computeMapping(
+ MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
+ SmallVectorImpl<RepairingPlacement> &RepairPts,
+ const RegBankSelect::MappingCost *BestCost) {
+ assert((MBFI || !BestCost) && "Costs comparison require MBFI");
+
+ // If mapped with InstrMapping, MI will have the recorded cost.
+ MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
+ bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
+ assert(!Saturated && "Possible mapping saturated the cost");
+ DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
+ DEBUG(dbgs() << "With: " << InstrMapping << '\n');
+ RepairPts.clear();
+ if (BestCost && Cost > *BestCost)
+ return Cost;
+
+ // Moreover, to realize this mapping, the register bank of each operand must
+ // match this mapping. In other words, we may need to locally reassign the
+ // register banks. Account for that repairing cost as well.
+ // In this context, local means in the surrounding of MI.
+ for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
+ ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ DEBUG(dbgs() << "Opd" << OpIdx);
+ const RegisterBankInfo::ValueMapping &ValMapping =
+ InstrMapping.getOperandMapping(OpIdx);
+ // If Reg is already properly mapped, this is free.
+ bool Assign;
+ if (assignmentMatch(Reg, ValMapping, Assign)) {
+ DEBUG(dbgs() << " is free (match).\n");
+ continue;
+ }
+ if (Assign) {
+ DEBUG(dbgs() << " is free (simple assignment).\n");
+ RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this,
+ RepairingPlacement::Reassign));
+ continue;
+ }
+
+ // Find the insertion point for the repairing code.
+ RepairPts.emplace_back(
+ RepairingPlacement(MI, OpIdx, *TRI, *this, RepairingPlacement::Insert));
+ RepairingPlacement &RepairPt = RepairPts.back();
+
+ // If we need to split a basic block to materialize this insertion point,
+ // we may give a higher cost to this mapping.
+ // Nevertheless, we may get away with the split, so try that first.
+ if (RepairPt.hasSplit())
+ tryAvoidingSplit(RepairPt, MO, ValMapping);
+
+ // Check that the materialization of the repairing is possible.
+ if (!RepairPt.canMaterialize())
+ return MappingCost::ImpossibleCost();
+
+ // Account for the split cost and repair cost.
+ // Unless the cost is already saturated or we do not care about the cost.
+ if (!BestCost || Saturated)
+ continue;
+
+ // To get accurate information we need MBFI and MBPI.
+ // Thus, if we end up here this information should be here.
+ assert(MBFI && MBPI && "Cost computation requires MBFI and MBPI");
+
+ // FIXME: We will have to rework the repairing cost model.
+ // The repairing cost depends on the register bank that MO has.
+ // However, when we break down the value into different values,
+ // MO may not have a register bank while still needing repairing.
+ // For the fast mode, we don't compute the cost so that is fine,
+ // but still for the repairing code, we will have to make a choice.
+ // For the greedy mode, we should choose greedily what is the best
+ // choice based on the next use of MO.
+
+ // Sums up the repairing cost of MO at each insertion point.
+ uint64_t RepairCost = getRepairCost(MO, ValMapping);
+ // Bias used for splitting: 5%.
+ const uint64_t PercentageForBias = 5;
+ uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100;
+ // We should not need more than a couple of instructions to repair
+ // an assignment. In other words, the computation should not
+ // overflow because the repairing cost is free of basic block
+ // frequency.
+ assert(((RepairCost < RepairCost * PercentageForBias) &&
+ (RepairCost * PercentageForBias <
+ RepairCost * PercentageForBias + 99)) &&
+ "Repairing involves more than a billion of instructions?!");
+ for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) {
+ assert(InsertPt->canMaterialize() && "We should not have made it here");
+ // We will applied some basic block frequency and those uses uint64_t.
+ if (!InsertPt->isSplit())
+ Saturated = Cost.addLocalCost(RepairCost);
+ else {
+ uint64_t CostForInsertPt = RepairCost;
+ // Again we shouldn't overflow here givent that
+ // CostForInsertPt is frequency free at this point.
+ assert(CostForInsertPt + Bias > CostForInsertPt &&
+ "Repairing + split bias overflows");
+ CostForInsertPt += Bias;
+ uint64_t PtCost = InsertPt->frequency(*this) * CostForInsertPt;
+ // Check if we just overflowed.
+ if ((Saturated = PtCost < CostForInsertPt))
+ Cost.saturate();
+ else
+ Saturated = Cost.addNonLocalCost(PtCost);
+ }
+
+ // Stop looking into what it takes to repair, this is already
+ // too expensive.
+ if (BestCost && Cost > *BestCost)
+ return Cost;
+
+ // No need to accumulate more cost information.
+ // We need to still gather the repairing information though.
+ if (Saturated)
+ break;
+ }
+ }
+ return Cost;
+}
+
+void RegBankSelect::applyMapping(
+ MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
+ SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
+ // OpdMapper will hold all the information needed for the rewritting.
+ RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI);
+
+ // First, place the repairing code.
+ for (RepairingPlacement &RepairPt : RepairPts) {
+ assert(RepairPt.canMaterialize() &&
+ RepairPt.getKind() != RepairingPlacement::Impossible &&
+ "This mapping is impossible");
+ assert(RepairPt.getKind() != RepairingPlacement::None &&
+ "This should not make its way in the list");
+ unsigned OpIdx = RepairPt.getOpIdx();
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ const RegisterBankInfo::ValueMapping &ValMapping =
+ InstrMapping.getOperandMapping(OpIdx);
+ unsigned BreakDownSize = ValMapping.BreakDown.size();
+ (void)BreakDownSize;
+ unsigned Reg = MO.getReg();
+
+ switch (RepairPt.getKind()) {
+ case RepairingPlacement::Reassign:
+ assert(BreakDownSize == 1 &&
+ "Reassignment should only be for simple mapping");
+ MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank);
+ break;
+ case RepairingPlacement::Insert:
+ OpdMapper.createVRegs(OpIdx);
+ repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx));
+ break;
+ default:
+ llvm_unreachable("Other kind should not happen");
+ }
+ }
+ // Second, rewrite the instruction.
+ DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
+ RBI->applyMapping(OpdMapper);
+}
+
+void RegBankSelect::assignInstr(MachineInstr &MI) {
+ DEBUG(dbgs() << "Assign: " << MI);
+ // Remember the repairing placement for all the operands.
+ SmallVector<RepairingPlacement, 4> RepairPts;
+
+ RegisterBankInfo::InstructionMapping BestMapping;
+ if (OptMode == RegBankSelect::Mode::Fast) {
+ BestMapping = RBI->getInstrMapping(MI);
+ MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts);
+ (void)DefaultCost;
+ assert(DefaultCost != MappingCost::ImpossibleCost() &&
+ "Default mapping is not suited");
+ } else {
+ RegisterBankInfo::InstructionMappings PossibleMappings =
+ RBI->getInstrPossibleMappings(MI);
+ assert(!PossibleMappings.empty() &&
+ "Do not know how to map this instruction");
+ BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts));
+ }
+ // Make sure the mapping is valid for MI.
+ assert(BestMapping.verify(MI) && "Invalid instruction mapping");
+
+ DEBUG(dbgs() << "Mapping: " << BestMapping << '\n');
+
+ // After this call, MI may not be valid anymore.
+ // Do not use it.
+ applyMapping(MI, BestMapping, RepairPts);
+}
+
+bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
+ const Function *F = MF.getFunction();
+ Mode SaveOptMode = OptMode;
+ if (F->hasFnAttribute(Attribute::OptimizeNone))
+ OptMode = Mode::Fast;
+ init(MF);
+ // Walk the function and assign register banks to all operands.
+ // Use a RPOT to make sure all registers are assigned before we choose
+ // the best mapping of the current instruction.
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
+ for (MachineBasicBlock *MBB : RPOT) {
+ // Set a sensible insertion point so that subsequent calls to
+ // MIRBuilder.
+ MIRBuilder.setMBB(*MBB);
+ for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end();
+ MII != End;) {
+ // MI might be invalidated by the assignment, so move the
+ // iterator before hand.
+ assignInstr(*MII++);
+ }
+ }
+ OptMode = SaveOptMode;
+ return false;
+}
+
+//------------------------------------------------------------------------------
+// Helper Classes Implementation
+//------------------------------------------------------------------------------
+RegBankSelect::RepairingPlacement::RepairingPlacement(
+ MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P,
+ RepairingPlacement::RepairingKind Kind)
+ // Default is, we are going to insert code to repair OpIdx.
+ : Kind(Kind),
+ OpIdx(OpIdx),
+ CanMaterialize(Kind != RepairingKind::Impossible),
+ HasSplit(false),
+ P(P) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isReg() && "Trying to repair a non-reg operand");
+
+ if (Kind != RepairingKind::Insert)
+ return;
+
+ // Repairings for definitions happen after MI, uses happen before.
+ bool Before = !MO.isDef();
+
+ // Check if we are done with MI.
+ if (!MI.isPHI() && !MI.isTerminator()) {
+ addInsertPoint(MI, Before);
+ // We are done with the initialization.
+ return;
+ }
+
+ // Now, look for the special cases.
+ if (MI.isPHI()) {
+ // - PHI must be the first instructions:
+ // * Before, we have to split the related incoming edge.
+ // * After, move the insertion point past the last phi.
+ if (!Before) {
+ MachineBasicBlock::iterator It = MI.getParent()->getFirstNonPHI();
+ if (It != MI.getParent()->end())
+ addInsertPoint(*It, /*Before*/ true);
+ else
+ addInsertPoint(*(--It), /*Before*/ false);
+ return;
+ }
+ // We repair a use of a phi, we may need to split the related edge.
+ MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB();
+ // Check if we can move the insertion point prior to the
+ // terminators of the predecessor.
+ unsigned Reg = MO.getReg();
+ MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr();
+ for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It)
+ if (It->modifiesRegister(Reg, &TRI)) {
+ // We cannot hoist the repairing code in the predecessor.
+ // Split the edge.
+ addInsertPoint(Pred, *MI.getParent());
+ return;
+ }
+ // At this point, we can insert in Pred.
+
+ // - If It is invalid, Pred is empty and we can insert in Pred
+ // wherever we want.
+ // - If It is valid, It is the first non-terminator, insert after It.
+ if (It == Pred.end())
+ addInsertPoint(Pred, /*Beginning*/ false);
+ else
+ addInsertPoint(*It, /*Before*/ false);
+ } else {
+ // - Terminators must be the last instructions:
+ // * Before, move the insert point before the first terminator.
+ // * After, we have to split the outcoming edges.
+ unsigned Reg = MO.getReg();
+ if (Before) {
+ // Check whether Reg is defined by any terminator.
+ MachineBasicBlock::iterator It = MI;
+ for (auto Begin = MI.getParent()->begin();
+ --It != Begin && It->isTerminator();)
+ if (It->modifiesRegister(Reg, &TRI)) {
+ // Insert the repairing code right after the definition.
+ addInsertPoint(*It, /*Before*/ false);
+ return;
+ }
+ addInsertPoint(*It, /*Before*/ true);
+ return;
+ }
+ // Make sure Reg is not redefined by other terminators, otherwise
+ // we do not know how to split.
+ for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end();
+ ++It != End;)
+ // The machine verifier should reject this kind of code.
+ assert(It->modifiesRegister(Reg, &TRI) && "Do not know where to split");
+ // Split each outcoming edges.
+ MachineBasicBlock &Src = *MI.getParent();
+ for (auto &Succ : Src.successors())
+ addInsertPoint(Src, Succ);
+ }
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(MachineInstr &MI,
+ bool Before) {
+ addInsertPoint(*new InstrInsertPoint(MI, Before));
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &MBB,
+ bool Beginning) {
+ addInsertPoint(*new MBBInsertPoint(MBB, Beginning));
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &Src,
+ MachineBasicBlock &Dst) {
+ addInsertPoint(*new EdgeInsertPoint(Src, Dst, P));
+}
+
+void RegBankSelect::RepairingPlacement::addInsertPoint(
+ RegBankSelect::InsertPoint &Point) {
+ CanMaterialize &= Point.canMaterialize();
+ HasSplit |= Point.isSplit();
+ InsertPoints.emplace_back(&Point);
+}
+
+RegBankSelect::InstrInsertPoint::InstrInsertPoint(MachineInstr &Instr,
+ bool Before)
+ : InsertPoint(), Instr(Instr), Before(Before) {
+ // Since we do not support splitting, we do not need to update
+ // liveness and such, so do not do anything with P.
+ assert((!Before || !Instr.isPHI()) &&
+ "Splitting before phis requires more points");
+ assert((!Before || !Instr.getNextNode() || !Instr.getNextNode()->isPHI()) &&
+ "Splitting between phis does not make sense");
+}
+
+void RegBankSelect::InstrInsertPoint::materialize() {
+ if (isSplit()) {
+ // Slice and return the beginning of the new block.
+ // If we need to split between the terminators, we theoritically
+ // need to know where the first and second set of terminators end
+ // to update the successors properly.
+ // Now, in pratice, we should have a maximum of 2 branch
+ // instructions; one conditional and one unconditional. Therefore
+ // we know how to update the successor by looking at the target of
+ // the unconditional branch.
+ // If we end up splitting at some point, then, we should update
+ // the liveness information and such. I.e., we would need to
+ // access P here.
+ // The machine verifier should actually make sure such cases
+ // cannot happen.
+ llvm_unreachable("Not yet implemented");
+ }
+ // Otherwise the insertion point is just the current or next
+ // instruction depending on Before. I.e., there is nothing to do
+ // here.
+}
+
+bool RegBankSelect::InstrInsertPoint::isSplit() const {
+ // If the insertion point is after a terminator, we need to split.
+ if (!Before)
+ return Instr.isTerminator();
+ // If we insert before an instruction that is after a terminator,
+ // we are still after a terminator.
+ return Instr.getPrevNode() && Instr.getPrevNode()->isTerminator();
+}
+
+uint64_t RegBankSelect::InstrInsertPoint::frequency(const Pass &P) const {
+ // Even if we need to split, because we insert between terminators,
+ // this split has actually the same frequency as the instruction.
+ const MachineBlockFrequencyInfo *MBFI =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (!MBFI)
+ return 1;
+ return MBFI->getBlockFreq(Instr.getParent()).getFrequency();
+}
+
+uint64_t RegBankSelect::MBBInsertPoint::frequency(const Pass &P) const {
+ const MachineBlockFrequencyInfo *MBFI =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (!MBFI)
+ return 1;
+ return MBFI->getBlockFreq(&MBB).getFrequency();
+}
+
+void RegBankSelect::EdgeInsertPoint::materialize() {
+ // If we end up repairing twice at the same place before materializing the
+ // insertion point, we may think we have to split an edge twice.
+ // We should have a factory for the insert point such that identical points
+ // are the same instance.
+ assert(Src.isSuccessor(DstOrSplit) && DstOrSplit->isPredecessor(&Src) &&
+ "This point has already been split");
+ MachineBasicBlock *NewBB = Src.SplitCriticalEdge(DstOrSplit, P);
+ assert(NewBB && "Invalid call to materialize");
+ // We reuse the destination block to hold the information of the new block.
+ DstOrSplit = NewBB;
+}
+
+uint64_t RegBankSelect::EdgeInsertPoint::frequency(const Pass &P) const {
+ const MachineBlockFrequencyInfo *MBFI =
+ P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (!MBFI)
+ return 1;
+ if (WasMaterialized)
+ return MBFI->getBlockFreq(DstOrSplit).getFrequency();
+
+ const MachineBranchProbabilityInfo *MBPI =
+ P.getAnalysisIfAvailable<MachineBranchProbabilityInfo>();
+ if (!MBPI)
+ return 1;
+ // The basic block will be on the edge.
+ return (MBFI->getBlockFreq(&Src) * MBPI->getEdgeProbability(&Src, DstOrSplit))
+ .getFrequency();
+}
+
+bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
+ // If this is not a critical edge, we should not have used this insert
+ // point. Indeed, either the successor or the predecessor should
+ // have do.
+ assert(Src.succ_size() > 1 && DstOrSplit->pred_size() > 1 &&
+ "Edge is not critical");
+ return Src.canSplitCriticalEdge(DstOrSplit);
+}
+
+RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
+ : LocalCost(0), NonLocalCost(0), LocalFreq(LocalFreq.getFrequency()) {}
+
+bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
+ // Check if this overflows.
+ if (LocalCost + Cost < LocalCost) {
+ saturate();
+ return true;
+ }
+ LocalCost += Cost;
+ return isSaturated();
+}
+
+bool RegBankSelect::MappingCost::addNonLocalCost(uint64_t Cost) {
+ // Check if this overflows.
+ if (NonLocalCost + Cost < NonLocalCost) {
+ saturate();
+ return true;
+ }
+ NonLocalCost += Cost;
+ return isSaturated();
+}
+
+bool RegBankSelect::MappingCost::isSaturated() const {
+ return LocalCost == UINT64_MAX - 1 && NonLocalCost == UINT64_MAX &&
+ LocalFreq == UINT64_MAX;
+}
+
+void RegBankSelect::MappingCost::saturate() {
+ *this = ImpossibleCost();
+ --LocalCost;
+}
+
+RegBankSelect::MappingCost RegBankSelect::MappingCost::ImpossibleCost() {
+ return MappingCost(UINT64_MAX, UINT64_MAX, UINT64_MAX);
+}
+
+bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const {
+ // Sort out the easy cases.
+ if (*this == Cost)
+ return false;
+ // If one is impossible to realize the other is cheaper unless it is
+ // impossible as well.
+ if ((*this == ImpossibleCost()) || (Cost == ImpossibleCost()))
+ return (*this == ImpossibleCost()) < (Cost == ImpossibleCost());
+ // If one is saturated the other is cheaper, unless it is saturated
+ // as well.
+ if (isSaturated() || Cost.isSaturated())
+ return isSaturated() < Cost.isSaturated();
+ // At this point we know both costs hold sensible values.
+
+ // If both values have a different base frequency, there is no much
+ // we can do but to scale everything.
+ // However, if they have the same base frequency we can avoid making
+ // complicated computation.
+ uint64_t ThisLocalAdjust;
+ uint64_t OtherLocalAdjust;
+ if (LLVM_LIKELY(LocalFreq == Cost.LocalFreq)) {
+
+ // At this point, we know the local costs are comparable.
+ // Do the case that do not involve potential overflow first.
+ if (NonLocalCost == Cost.NonLocalCost)
+ // Since the non-local costs do not discriminate on the result,
+ // just compare the local costs.
+ return LocalCost < Cost.LocalCost;
+
+ // The base costs are comparable so we may only keep the relative
+ // value to increase our chances of avoiding overflows.
+ ThisLocalAdjust = 0;
+ OtherLocalAdjust = 0;
+ if (LocalCost < Cost.LocalCost)
+ OtherLocalAdjust = Cost.LocalCost - LocalCost;
+ else
+ ThisLocalAdjust = LocalCost - Cost.LocalCost;
+
+ } else {
+ ThisLocalAdjust = LocalCost;
+ OtherLocalAdjust = Cost.LocalCost;
+ }
+
+ // The non-local costs are comparable, just keep the relative value.
+ uint64_t ThisNonLocalAdjust = 0;
+ uint64_t OtherNonLocalAdjust = 0;
+ if (NonLocalCost < Cost.NonLocalCost)
+ OtherNonLocalAdjust = Cost.NonLocalCost - NonLocalCost;
+ else
+ ThisNonLocalAdjust = NonLocalCost - Cost.NonLocalCost;
+ // Scale everything to make them comparable.
+ uint64_t ThisScaledCost = ThisLocalAdjust * LocalFreq;
+ // Check for overflow on that operation.
+ bool ThisOverflows = ThisLocalAdjust && (ThisScaledCost < ThisLocalAdjust ||
+ ThisScaledCost < LocalFreq);
+ uint64_t OtherScaledCost = OtherLocalAdjust * Cost.LocalFreq;
+ // Check for overflow on the last operation.
+ bool OtherOverflows =
+ OtherLocalAdjust &&
+ (OtherScaledCost < OtherLocalAdjust || OtherScaledCost < Cost.LocalFreq);
+ // Add the non-local costs.
+ ThisOverflows |= ThisNonLocalAdjust &&
+ ThisScaledCost + ThisNonLocalAdjust < ThisNonLocalAdjust;
+ ThisScaledCost += ThisNonLocalAdjust;
+ OtherOverflows |= OtherNonLocalAdjust &&
+ OtherScaledCost + OtherNonLocalAdjust < OtherNonLocalAdjust;
+ OtherScaledCost += OtherNonLocalAdjust;
+ // If both overflows, we cannot compare without additional
+ // precision, e.g., APInt. Just give up on that case.
+ if (ThisOverflows && OtherOverflows)
+ return false;
+ // If one overflows but not the other, we can still compare.
+ if (ThisOverflows || OtherOverflows)
+ return ThisOverflows < OtherOverflows;
+ // Otherwise, just compare the values.
+ return ThisScaledCost < OtherScaledCost;
+}
+
+bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const {
+ return LocalCost == Cost.LocalCost && NonLocalCost == Cost.NonLocalCost &&
+ LocalFreq == Cost.LocalFreq;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
new file mode 100644
index 000000000000..a911225b5af5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -0,0 +1,107 @@
+//===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the RegisterBank class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "registerbank"
+
+using namespace llvm;
+
+const unsigned RegisterBank::InvalidID = UINT_MAX;
+
+RegisterBank::RegisterBank() : ID(InvalidID), Name(nullptr), Size(0) {}
+
+bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
+ assert(isValid() && "Invalid register bank");
+ assert(ContainedRegClasses.size() == TRI.getNumRegClasses() &&
+ "TRI does not match the initialization process?");
+ for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) {
+ const TargetRegisterClass &RC = *TRI.getRegClass(RCId);
+
+ if (!covers(RC))
+ continue;
+ // Verify that the register bank covers all the sub classes of the
+ // classes it covers.
+
+ // Use a different (slow in that case) method than
+ // RegisterBankInfo to find the subclasses of RC, to make sure
+ // both agree on the covers.
+ for (unsigned SubRCId = 0; SubRCId != End; ++SubRCId) {
+ const TargetRegisterClass &SubRC = *TRI.getRegClass(RCId);
+
+ if (!RC.hasSubClassEq(&SubRC))
+ continue;
+
+ // Verify that the Size of the register bank is big enough to cover
+ // all the register classes it covers.
+ assert((getSize() >= SubRC.getSize() * 8) &&
+ "Size is not big enough for all the subclasses!");
+ assert(covers(SubRC) && "Not all subclasses are covered");
+ }
+ }
+ return true;
+}
+
+bool RegisterBank::covers(const TargetRegisterClass &RC) const {
+ assert(isValid() && "RB hasn't been initialized yet");
+ return ContainedRegClasses.test(RC.getID());
+}
+
+bool RegisterBank::isValid() const {
+ return ID != InvalidID && Name != nullptr && Size != 0 &&
+ // A register bank that does not cover anything is useless.
+ !ContainedRegClasses.empty();
+}
+
+bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
+ // There must be only one instance of a given register bank alive
+ // for the whole compilation.
+ // The RegisterBankInfo is supposed to enforce that.
+ assert((OtherRB.getID() != getID() || &OtherRB == this) &&
+ "ID does not uniquely identify a RegisterBank");
+ return &OtherRB == this;
+}
+
+void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
+ print(dbgs(), /* IsForDebug */ true, TRI);
+}
+
+void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
+ const TargetRegisterInfo *TRI) const {
+ OS << getName();
+ if (!IsForDebug)
+ return;
+ OS << "(ID:" << getID() << ", Size:" << getSize() << ")\n"
+ << "isValid:" << isValid() << '\n'
+ << "Number of Covered register classes: " << ContainedRegClasses.count()
+ << '\n';
+ // Print all the subclasses if we can.
+ // This register classes may not be properly initialized yet.
+ if (!TRI || ContainedRegClasses.empty())
+ return;
+ assert(ContainedRegClasses.size() == TRI->getNumRegClasses() &&
+ "TRI does not match the initialization process?");
+ bool IsFirst = true;
+ OS << "Covered register classes:\n";
+ for (unsigned RCId = 0, End = TRI->getNumRegClasses(); RCId != End; ++RCId) {
+ const TargetRegisterClass &RC = *TRI->getRegClass(RCId);
+
+ if (!covers(RC))
+ continue;
+
+ if (!IsFirst)
+ OS << ", ";
+ OS << TRI->getRegClassName(&RC);
+ IsFirst = false;
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
new file mode 100644
index 000000000000..ef8e4f6d6851
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -0,0 +1,663 @@
+//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the RegisterBankInfo class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#include <algorithm> // For std::max.
+
+#define DEBUG_TYPE "registerbankinfo"
+
+using namespace llvm;
+
+const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX;
+const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
+
+//------------------------------------------------------------------------------
+// RegisterBankInfo implementation.
+//------------------------------------------------------------------------------
+RegisterBankInfo::RegisterBankInfo(unsigned NumRegBanks)
+ : NumRegBanks(NumRegBanks) {
+ RegBanks.reset(new RegisterBank[NumRegBanks]);
+}
+
+bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
+ DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+ const RegisterBank &RegBank = getRegBank(Idx);
+ assert(Idx == RegBank.getID() &&
+ "ID does not match the index in the array");
+ dbgs() << "Verify " << RegBank << '\n';
+ assert(RegBank.verify(TRI) && "RegBank is invalid");
+ });
+ return true;
+}
+
+void RegisterBankInfo::createRegisterBank(unsigned ID, const char *Name) {
+ DEBUG(dbgs() << "Create register bank: " << ID << " with name \"" << Name
+ << "\"\n");
+ RegisterBank &RegBank = getRegBank(ID);
+ assert(RegBank.getID() == RegisterBank::InvalidID &&
+ "A register bank should be created only once");
+ RegBank.ID = ID;
+ RegBank.Name = Name;
+}
+
+void RegisterBankInfo::addRegBankCoverage(unsigned ID, unsigned RCId,
+ const TargetRegisterInfo &TRI,
+ bool AddTypeMapping) {
+ RegisterBank &RB = getRegBank(ID);
+ unsigned NbOfRegClasses = TRI.getNumRegClasses();
+
+ DEBUG(dbgs() << "Add coverage for: " << RB << '\n');
+
+ // Check if RB is underconstruction.
+ if (!RB.isValid())
+ RB.ContainedRegClasses.resize(NbOfRegClasses);
+ else if (RB.covers(*TRI.getRegClass(RCId)))
+ // If RB already covers this register class, there is nothing
+ // to do.
+ return;
+
+ BitVector &Covered = RB.ContainedRegClasses;
+ SmallVector<unsigned, 8> WorkList;
+
+ WorkList.push_back(RCId);
+ Covered.set(RCId);
+
+ unsigned &MaxSize = RB.Size;
+ do {
+ unsigned RCId = WorkList.pop_back_val();
+
+ const TargetRegisterClass &CurRC = *TRI.getRegClass(RCId);
+
+ DEBUG(dbgs() << "Examine: " << TRI.getRegClassName(&CurRC)
+ << "(Size*8: " << (CurRC.getSize() * 8) << ")\n");
+
+ // Remember the biggest size in bits.
+ MaxSize = std::max(MaxSize, CurRC.getSize() * 8);
+
+ // If we have been asked to record the type supported by this
+ // register bank, do it now.
+ if (AddTypeMapping)
+ for (MVT::SimpleValueType SVT :
+ make_range(CurRC.vt_begin(), CurRC.vt_end()))
+ recordRegBankForType(getRegBank(ID), SVT);
+
+ // Walk through all sub register classes and push them into the worklist.
+ bool First = true;
+ for (BitMaskClassIterator It(CurRC.getSubClassMask(), TRI); It.isValid();
+ ++It) {
+ unsigned SubRCId = It.getID();
+ if (!Covered.test(SubRCId)) {
+ if (First)
+ DEBUG(dbgs() << " Enqueue sub-class: ");
+ DEBUG(dbgs() << TRI.getRegClassName(TRI.getRegClass(SubRCId)) << ", ");
+ WorkList.push_back(SubRCId);
+ // Remember that we saw the sub class.
+ Covered.set(SubRCId);
+ First = false;
+ }
+ }
+ if (!First)
+ DEBUG(dbgs() << '\n');
+
+ // Push also all the register classes that can be accessed via a
+ // subreg index, i.e., its subreg-class (which is different than
+ // its subclass).
+ //
+ // Note: It would probably be faster to go the other way around
+ // and have this method add only super classes, since this
+ // information is available in a more efficient way. However, it
+ // feels less natural for the client of this APIs plus we will
+ // TableGen the whole bitset at some point, so compile time for
+ // the initialization is not very important.
+ First = true;
+ for (unsigned SubRCId = 0; SubRCId < NbOfRegClasses; ++SubRCId) {
+ if (Covered.test(SubRCId))
+ continue;
+ bool Pushed = false;
+ const TargetRegisterClass *SubRC = TRI.getRegClass(SubRCId);
+ for (SuperRegClassIterator SuperRCIt(SubRC, &TRI); SuperRCIt.isValid();
+ ++SuperRCIt) {
+ if (Pushed)
+ break;
+ for (BitMaskClassIterator It(SuperRCIt.getMask(), TRI); It.isValid();
+ ++It) {
+ unsigned SuperRCId = It.getID();
+ if (SuperRCId == RCId) {
+ if (First)
+ DEBUG(dbgs() << " Enqueue subreg-class: ");
+ DEBUG(dbgs() << TRI.getRegClassName(SubRC) << ", ");
+ WorkList.push_back(SubRCId);
+ // Remember that we saw the sub class.
+ Covered.set(SubRCId);
+ Pushed = true;
+ First = false;
+ break;
+ }
+ }
+ }
+ }
+ if (!First)
+ DEBUG(dbgs() << '\n');
+ } while (!WorkList.empty());
+}
+
+const RegisterBank *
+RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return &getRegBankFromRegClass(*TRI.getMinimalPhysRegClass(Reg));
+
+ assert(Reg && "NoRegister does not have a register bank");
+ const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ if (RegClassOrBank.is<const RegisterBank *>())
+ return RegClassOrBank.get<const RegisterBank *>();
+ const TargetRegisterClass *RC =
+ RegClassOrBank.get<const TargetRegisterClass *>();
+ if (RC)
+ return &getRegBankFromRegClass(*RC);
+ return nullptr;
+}
+
+const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
+ const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) const {
+ // The mapping of the registers may be available via the
+ // register class constraints.
+ const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, &TRI);
+
+ if (!RC)
+ return nullptr;
+
+ const RegisterBank &RegBank = getRegBankFromRegClass(*RC);
+ // Sanity check that the target properly implemented getRegBankFromRegClass.
+ assert(RegBank.covers(*RC) &&
+ "The mapping of the register bank does not make sense");
+ return &RegBank;
+}
+
+RegisterBankInfo::InstructionMapping
+RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
+ RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1,
+ MI.getNumOperands());
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ // We may need to query the instruction encoding to guess the mapping.
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Before doing anything complicated check if the mapping is not
+ // directly available.
+ bool CompleteMapping = true;
+ // For copies we want to walk over the operands and try to find one
+ // that has a register bank.
+ bool isCopyLike = MI.isCopy() || MI.isPHI();
+ // Remember the register bank for reuse for copy-like instructions.
+ const RegisterBank *RegBank = nullptr;
+ // Remember the size of the register for reuse for copy-like instructions.
+ unsigned RegSize = 0;
+ for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ // The register bank of Reg is just a side effect of the current
+ // excution and in particular, there is no reason to believe this
+ // is the best default mapping for the current instruction. Keep
+ // it as an alternative register bank if we cannot figure out
+ // something.
+ const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI);
+ // For copy-like instruction, we want to reuse the register bank
+ // that is already set on Reg, if any, since those instructions do
+ // not have any constraints.
+ const RegisterBank *CurRegBank = isCopyLike ? AltRegBank : nullptr;
+ if (!CurRegBank) {
+ // If this is a target specific instruction, we can deduce
+ // the register bank from the encoding constraints.
+ CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI);
+ if (!CurRegBank) {
+ // Check if we can deduce the register bank from the type of
+ // the instruction.
+ Type *MITy = MI.getType();
+ if (MITy)
+ CurRegBank = getRegBankForType(
+ MVT::getVT(MITy, /*HandleUnknown*/ true).SimpleTy);
+ if (!CurRegBank)
+ // Use the current assigned register bank.
+ // That may not make much sense though.
+ CurRegBank = AltRegBank;
+ if (!CurRegBank) {
+ // All our attempts failed, give up.
+ CompleteMapping = false;
+
+ if (!isCopyLike)
+ // MI does not carry enough information to guess the mapping.
+ return InstructionMapping();
+
+ // For copies, we want to keep interating to find a register
+ // bank for the other operands if we did not find one yet.
+ if (RegBank)
+ break;
+ continue;
+ }
+ }
+ }
+ RegBank = CurRegBank;
+ RegSize = getSizeInBits(Reg, MRI, TRI);
+ Mapping.setOperandMapping(OpIdx, RegSize, *CurRegBank);
+ }
+
+ if (CompleteMapping)
+ return Mapping;
+
+ assert(isCopyLike && "We should have bailed on non-copies at this point");
+ // For copy like instruction, if none of the operand has a register
+ // bank avialable, there is nothing we can propagate.
+ if (!RegBank)
+ return InstructionMapping();
+
+ // This is a copy-like instruction.
+ // Propagate RegBank to all operands that do not have a
+ // mapping yet.
+ for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ // Don't assign a mapping for non-reg operands.
+ if (!MO.isReg())
+ continue;
+
+ // If a mapping already exists, do not touch it.
+ if (!static_cast<const InstructionMapping *>(&Mapping)
+ ->getOperandMapping(OpIdx)
+ .BreakDown.empty())
+ continue;
+
+ Mapping.setOperandMapping(OpIdx, RegSize, *RegBank);
+ }
+ return Mapping;
+}
+
+RegisterBankInfo::InstructionMapping
+RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ llvm_unreachable("The target must implement this");
+}
+
+RegisterBankInfo::InstructionMappings
+RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
+ InstructionMappings PossibleMappings;
+ // Put the default mapping first.
+ PossibleMappings.push_back(getInstrMapping(MI));
+ // Then the alternative mapping, if any.
+ InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
+ for (InstructionMapping &AltMapping : AltMappings)
+ PossibleMappings.emplace_back(std::move(AltMapping));
+#ifndef NDEBUG
+ for (const InstructionMapping &Mapping : PossibleMappings)
+ assert(Mapping.verify(MI) && "Mapping is invalid");
+#endif
+ return PossibleMappings;
+}
+
+RegisterBankInfo::InstructionMappings
+RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
+ // No alternative for MI.
+ return InstructionMappings();
+}
+
+void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
+ MachineInstr &MI = OpdMapper.getMI();
+ DEBUG(dbgs() << "Applying default-like mapping\n");
+ for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx;
+ ++OpIdx) {
+ DEBUG(dbgs() << "OpIdx " << OpIdx);
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg()) {
+ DEBUG(dbgs() << " is not a register, nothing to be done\n");
+ continue;
+ }
+ assert(
+ OpdMapper.getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() ==
+ 1 &&
+ "This mapping is too complex for this function");
+ iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
+ OpdMapper.getVRegs(OpIdx);
+ if (NewRegs.begin() == NewRegs.end()) {
+ DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
+ continue;
+ }
+ DEBUG(dbgs() << " changed, replace " << MO.getReg());
+ MO.setReg(*NewRegs.begin());
+ DEBUG(dbgs() << " with " << MO.getReg());
+ }
+}
+
+unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) {
+ const TargetRegisterClass *RC = nullptr;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // The size is not directly available for physical registers.
+ // Instead, we need to access a register class that contains Reg and
+ // get the size of that register class.
+ RC = TRI.getMinimalPhysRegClass(Reg);
+ } else {
+ unsigned RegSize = MRI.getSize(Reg);
+ // If Reg is not a generic register, query the register class to
+ // get its size.
+ if (RegSize)
+ return RegSize;
+ // Since Reg is not a generic register, it must have a register class.
+ RC = MRI.getRegClass(Reg);
+ }
+ assert(RC && "Unable to deduce the register class");
+ return RC->getSize() * 8;
+}
+
+//------------------------------------------------------------------------------
+// Helper classes implementation.
+//------------------------------------------------------------------------------
+void RegisterBankInfo::PartialMapping::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+bool RegisterBankInfo::PartialMapping::verify() const {
+ assert(RegBank && "Register bank not set");
+ assert(Length && "Empty mapping");
+ assert((StartIdx < getHighBitIdx()) && "Overflow, switch to APInt?");
+ // Check if the minimum width fits into RegBank.
+ assert(RegBank->getSize() >= Length && "Register bank too small for Mask");
+ return true;
+}
+
+void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const {
+ OS << "[" << StartIdx << ", " << getHighBitIdx() << "], RegBank = ";
+ if (RegBank)
+ OS << *RegBank;
+ else
+ OS << "nullptr";
+}
+
+bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
+ assert(!BreakDown.empty() && "Value mapped nowhere?!");
+ unsigned OrigValueBitWidth = 0;
+ for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+ // Check that each register bank is big enough to hold the partial value:
+ // this check is done by PartialMapping::verify
+ assert(PartMap.verify() && "Partial mapping is invalid");
+ // The original value should completely be mapped.
+ // Thus the maximum accessed index + 1 is the size of the original value.
+ OrigValueBitWidth =
+ std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1);
+ }
+ assert(OrigValueBitWidth == ExpectedBitWidth && "BitWidth does not match");
+ APInt ValueMask(OrigValueBitWidth, 0);
+ for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+ // Check that the union of the partial mappings covers the whole value,
+ // without overlaps.
+ // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1.
+ APInt PartMapMask = APInt::getBitsSet(OrigValueBitWidth, PartMap.StartIdx,
+ PartMap.getHighBitIdx() + 1);
+ ValueMask ^= PartMapMask;
+ assert((ValueMask & PartMapMask) == PartMapMask &&
+ "Some partial mappings overlap");
+ }
+ assert(ValueMask.isAllOnesValue() && "Value is not fully mapped");
+ return true;
+}
+
+void RegisterBankInfo::ValueMapping::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
+ OS << "#BreakDown: " << BreakDown.size() << " ";
+ bool IsFirst = true;
+ for (const PartialMapping &PartMap : BreakDown) {
+ if (!IsFirst)
+ OS << ", ";
+ OS << '[' << PartMap << ']';
+ IsFirst = false;
+ }
+}
+
+void RegisterBankInfo::InstructionMapping::setOperandMapping(
+ unsigned OpIdx, unsigned MaskSize, const RegisterBank &RegBank) {
+ // Build the value mapping.
+ assert(MaskSize <= RegBank.getSize() && "Register bank is too small");
+
+ // Create the mapping object.
+ getOperandMapping(OpIdx).BreakDown.push_back(
+ PartialMapping(0, MaskSize, RegBank));
+}
+
+bool RegisterBankInfo::InstructionMapping::verify(
+ const MachineInstr &MI) const {
+ // Check that all the register operands are properly mapped.
+ // Check the constructor invariant.
+ assert(NumOperands == MI.getNumOperands() &&
+ "NumOperands must match, see constructor");
+ assert(MI.getParent() && MI.getParent()->getParent() &&
+ "MI must be connected to a MachineFunction");
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ (void)MF;
+
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ const MachineOperand &MO = MI.getOperand(Idx);
+ const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
+ (void)MOMapping;
+ if (!MO.isReg()) {
+ assert(MOMapping.BreakDown.empty() &&
+ "We should not care about non-reg mapping");
+ continue;
+ }
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ // Register size in bits.
+ // This size must match what the mapping expects.
+ assert(MOMapping.verify(getSizeInBits(
+ Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) &&
+ "Value mapping is invalid");
+ }
+ return true;
+}
+
+void RegisterBankInfo::InstructionMapping::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+
+void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const {
+ OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: ";
+
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ const ValueMapping &ValMapping = getOperandMapping(OpIdx);
+ if (OpIdx)
+ OS << ", ";
+ OS << "{ Idx: " << OpIdx << " Map: " << ValMapping << '}';
+ }
+}
+
+const int RegisterBankInfo::OperandsMapper::DontKnowIdx = -1;
+
+RegisterBankInfo::OperandsMapper::OperandsMapper(
+ MachineInstr &MI, const InstructionMapping &InstrMapping,
+ MachineRegisterInfo &MRI)
+ : MRI(MRI), MI(MI), InstrMapping(InstrMapping) {
+ unsigned NumOpds = MI.getNumOperands();
+ OpToNewVRegIdx.reset(new int[NumOpds]);
+ std::fill(&OpToNewVRegIdx[0], &OpToNewVRegIdx[NumOpds],
+ OperandsMapper::DontKnowIdx);
+ assert(InstrMapping.verify(MI) && "Invalid mapping for MI");
+}
+
+iterator_range<SmallVectorImpl<unsigned>::iterator>
+RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
+ assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ unsigned NumPartialVal =
+ getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+ int StartIdx = OpToNewVRegIdx[OpIdx];
+
+ if (StartIdx == OperandsMapper::DontKnowIdx) {
+ // This is the first time we try to access OpIdx.
+ // Create the cells that will hold all the partial values at the
+ // end of the list of NewVReg.
+ StartIdx = NewVRegs.size();
+ OpToNewVRegIdx[OpIdx] = StartIdx;
+ for (unsigned i = 0; i < NumPartialVal; ++i)
+ NewVRegs.push_back(0);
+ }
+ SmallVectorImpl<unsigned>::iterator End =
+ getNewVRegsEnd(StartIdx, NumPartialVal);
+
+ return make_range(&NewVRegs[StartIdx], End);
+}
+
+SmallVectorImpl<unsigned>::const_iterator
+RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
+ unsigned NumVal) const {
+ return const_cast<OperandsMapper *>(this)->getNewVRegsEnd(StartIdx, NumVal);
+}
+SmallVectorImpl<unsigned>::iterator
+RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
+ unsigned NumVal) {
+ assert((NewVRegs.size() == StartIdx + NumVal ||
+ NewVRegs.size() > StartIdx + NumVal) &&
+ "NewVRegs too small to contain all the partial mapping");
+ return NewVRegs.size() <= StartIdx + NumVal ? NewVRegs.end()
+ : &NewVRegs[StartIdx + NumVal];
+}
+
+void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
+ assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx =
+ getVRegsMem(OpIdx);
+ const SmallVectorImpl<PartialMapping> &PartMapList =
+ getInstrMapping().getOperandMapping(OpIdx).BreakDown;
+ SmallVectorImpl<PartialMapping>::const_iterator PartMap = PartMapList.begin();
+ for (unsigned &NewVReg : NewVRegsForOpIdx) {
+ assert(PartMap != PartMapList.end() && "Out-of-bound access");
+ assert(NewVReg == 0 && "Register has already been created");
+ NewVReg = MRI.createGenericVirtualRegister(PartMap->Length);
+ MRI.setRegBank(NewVReg, *PartMap->RegBank);
+ ++PartMap;
+ }
+}
+
+void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
+ unsigned PartialMapIdx,
+ unsigned NewVReg) {
+ assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ assert(getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() >
+ PartialMapIdx &&
+ "Out-of-bound access for partial mapping");
+ // Make sure the memory is initialized for that operand.
+ (void)getVRegsMem(OpIdx);
+ assert(NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] == 0 &&
+ "This value is already set");
+ NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg;
+}
+
+iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
+ bool ForDebug) const {
+ (void)ForDebug;
+ assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+ int StartIdx = OpToNewVRegIdx[OpIdx];
+
+ if (StartIdx == OperandsMapper::DontKnowIdx)
+ return make_range(NewVRegs.end(), NewVRegs.end());
+
+ unsigned PartMapSize =
+ getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+ SmallVectorImpl<unsigned>::const_iterator End =
+ getNewVRegsEnd(StartIdx, PartMapSize);
+ iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res =
+ make_range(&NewVRegs[StartIdx], End);
+#ifndef NDEBUG
+ for (unsigned VReg : Res)
+ assert((VReg || ForDebug) && "Some registers are uninitialized");
+#endif
+ return Res;
+}
+
+void RegisterBankInfo::OperandsMapper::dump() const {
+ print(dbgs(), true);
+ dbgs() << '\n';
+}
+
+void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
+ bool ForDebug) const {
+ unsigned NumOpds = getMI().getNumOperands();
+ if (ForDebug) {
+ OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n';
+ // Print out the internal state of the index table.
+ OS << "Populated indices (CellNumber, IndexInNewVRegs): ";
+ bool IsFirst = true;
+ for (unsigned Idx = 0; Idx != NumOpds; ++Idx) {
+ if (OpToNewVRegIdx[Idx] != DontKnowIdx) {
+ if (!IsFirst)
+ OS << ", ";
+ OS << '(' << Idx << ", " << OpToNewVRegIdx[Idx] << ')';
+ IsFirst = false;
+ }
+ }
+ OS << '\n';
+ } else
+ OS << "Mapping ID: " << getInstrMapping().getID() << ' ';
+
+ OS << "Operand Mapping: ";
+ // If we have a function, we can pretty print the name of the registers.
+ // Otherwise we will print the raw numbers.
+ const TargetRegisterInfo *TRI =
+ getMI().getParent() && getMI().getParent()->getParent()
+ ? getMI().getParent()->getParent()->getSubtarget().getRegisterInfo()
+ : nullptr;
+ bool IsFirst = true;
+ for (unsigned Idx = 0; Idx != NumOpds; ++Idx) {
+ if (OpToNewVRegIdx[Idx] == DontKnowIdx)
+ continue;
+ if (!IsFirst)
+ OS << ", ";
+ IsFirst = false;
+ OS << '(' << PrintReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
+ bool IsFirstNewVReg = true;
+ for (unsigned VReg : getVRegs(Idx)) {
+ if (!IsFirstNewVReg)
+ OS << ", ";
+ IsFirstNewVReg = false;
+ OS << PrintReg(VReg, TRI);
+ }
+ OS << "])";
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
new file mode 100644
index 000000000000..8c760b724d13
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -0,0 +1,605 @@
+//===-- GlobalMerge.cpp - Internal globals merging -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+//
+// However, merging globals can have tradeoffs:
+// - it confuses debuggers, tools, and users
+// - it makes linker optimizations less useful (order files, LOHs, ...)
+// - it forces usage of indexed addressing (which isn't necessarily "free")
+// - it can increase register pressure when the uses are disparate enough.
+//
+// We use heuristics to discover the best global grouping we can (cf cl::opts).
+// ===---------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "global-merge"
+
+// FIXME: This is only useful as a last-resort way to disable the pass.
+static cl::opt<bool>
+EnableGlobalMerge("enable-global-merge", cl::Hidden,
+ cl::desc("Enable the global merge pass"),
+ cl::init(true));
+
+static cl::opt<unsigned>
+GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden,
+ cl::desc("Set maximum offset for global merge pass"),
+ cl::init(0));
+
+static cl::opt<bool> GlobalMergeGroupByUse(
+ "global-merge-group-by-use", cl::Hidden,
+ cl::desc("Improve global merge pass to look at uses"), cl::init(true));
+
+static cl::opt<bool> GlobalMergeIgnoreSingleUse(
+ "global-merge-ignore-single-use", cl::Hidden,
+ cl::desc("Improve global merge pass to ignore globals only used alone"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
+// FIXME: this could be a transitional option, and we probably need to remove
+// it if only we are sure this optimization could always benefit all targets.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
+ cl::desc("Enable global merge pass on external linkage"));
+
+STATISTIC(NumMerged, "Number of globals merged");
+namespace {
+ class GlobalMerge : public FunctionPass {
+ const TargetMachine *TM;
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions), see the code that passes in the offset in the ARM backend
+ // for more information.
+ unsigned MaxOffset;
+
+ /// Whether we should try to optimize for size only.
+ /// Currently, this applies a dead simple heuristic: only consider globals
+ /// used in minsize functions for merging.
+ /// FIXME: This could learn about optsize, and be used in the cost model.
+ bool OnlyOptimizeForSize;
+
+ /// Whether we should merge global variables that have external linkage.
+ bool MergeExternalGlobals;
+
+ bool IsMachO;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst, unsigned AddrSpace) const;
+ /// \brief Merge everything in \p Globals for which the corresponding bit
+ /// in \p GlobalSet is set.
+ bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
+ const BitVector &GlobalSet, Module &M, bool isConst,
+ unsigned AddrSpace) const;
+
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that must not be merged away
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit GlobalMerge()
+ : FunctionPass(ID), TM(nullptr), MaxOffset(GlobalMergeMaxOffset),
+ OnlyOptimizeForSize(false), MergeExternalGlobals(false) {
+ initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
+ bool OnlyOptimizeForSize, bool MergeExternalGlobals)
+ : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
+ OnlyOptimizeForSize(OnlyOptimizeForSize),
+ MergeExternalGlobals(MergeExternalGlobals) {
+ initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ bool doFinalization(Module &M) override;
+
+ const char *getPassName() const override {
+ return "Merge internal globals";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char GlobalMerge::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables",
+ false, false)
+INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables",
+ false, false)
+
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst, unsigned AddrSpace) const {
+ auto &DL = M.getDataLayout();
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(),
+ [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ return DL.getTypeAllocSize(GV1->getValueType()) <
+ DL.getTypeAllocSize(GV2->getValueType());
+ });
+
+ // If we want to just blindly group all globals together, do so.
+ if (!GlobalMergeGroupByUse) {
+ BitVector AllGlobals(Globals.size());
+ AllGlobals.set();
+ return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
+ }
+
+ // If we want to be smarter, look at all uses of each global, to try to
+ // discover all sets of globals used together, and how many times each of
+ // these sets occurred.
+ //
+ // Keep this reasonably efficient, by having an append-only list of all sets
+ // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
+ // code (currently, a Function) to the set of globals seen so far that are
+ // used together in that unit (GlobalUsesByFunction).
+ //
+ // When we look at the Nth global, we now that any new set is either:
+ // - the singleton set {N}, containing this global only, or
+ // - the union of {N} and a previously-discovered set, containing some
+ // combination of the previous N-1 globals.
+ // Using that knowledge, when looking at the Nth global, we can keep:
+ // - a reference to the singleton set {N} (CurGVOnlySetIdx)
+ // - a list mapping each previous set to its union with {N} (EncounteredUGS),
+ // if it actually occurs.
+
+ // We keep track of the sets of globals used together "close enough".
+ struct UsedGlobalSet {
+ UsedGlobalSet(size_t Size) : Globals(Size), UsageCount(1) {}
+ BitVector Globals;
+ unsigned UsageCount;
+ };
+
+ // Each set is unique in UsedGlobalSets.
+ std::vector<UsedGlobalSet> UsedGlobalSets;
+
+ // Avoid repeating the create-global-set pattern.
+ auto CreateGlobalSet = [&]() -> UsedGlobalSet & {
+ UsedGlobalSets.emplace_back(Globals.size());
+ return UsedGlobalSets.back();
+ };
+
+ // The first set is the empty set.
+ CreateGlobalSet().UsageCount = 0;
+
+ // We define "close enough" to be "in the same function".
+ // FIXME: Grouping uses by function is way too aggressive, so we should have
+ // a better metric for distance between uses.
+ // The obvious alternative would be to group by BasicBlock, but that's in
+ // turn too conservative..
+ // Anything in between wouldn't be trivial to compute, so just stick with
+ // per-function grouping.
+
+ // The value type is an index into UsedGlobalSets.
+ // The default (0) conveniently points to the empty set.
+ DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction;
+
+ // Now, look at each merge-eligible global in turn.
+
+ // Keep track of the sets we already encountered to which we added the
+ // current global.
+ // Each element matches the same-index element in UsedGlobalSets.
+ // This lets us efficiently tell whether a set has already been expanded to
+ // include the current global.
+ std::vector<size_t> EncounteredUGS;
+
+ for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) {
+ GlobalVariable *GV = Globals[GI];
+
+ // Reset the encountered sets for this global...
+ std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0);
+ // ...and grow it in case we created new sets for the previous global.
+ EncounteredUGS.resize(UsedGlobalSets.size());
+
+ // We might need to create a set that only consists of the current global.
+ // Keep track of its index into UsedGlobalSets.
+ size_t CurGVOnlySetIdx = 0;
+
+ // For each global, look at all its Uses.
+ for (auto &U : GV->uses()) {
+ // This Use might be a ConstantExpr. We're interested in Instruction
+ // users, so look through ConstantExpr...
+ Use *UI, *UE;
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
+ if (CE->use_empty())
+ continue;
+ UI = &*CE->use_begin();
+ UE = nullptr;
+ } else if (isa<Instruction>(U.getUser())) {
+ UI = &U;
+ UE = UI->getNext();
+ } else {
+ continue;
+ }
+
+ // ...to iterate on all the instruction users of the global.
+ // Note that we iterate on Uses and not on Users to be able to getNext().
+ for (; UI != UE; UI = UI->getNext()) {
+ Instruction *I = dyn_cast<Instruction>(UI->getUser());
+ if (!I)
+ continue;
+
+ Function *ParentFn = I->getParent()->getParent();
+
+ // If we're only optimizing for size, ignore non-minsize functions.
+ if (OnlyOptimizeForSize && !ParentFn->optForMinSize())
+ continue;
+
+ size_t UGSIdx = GlobalUsesByFunction[ParentFn];
+
+ // If this is the first global the basic block uses, map it to the set
+ // consisting of this global only.
+ if (!UGSIdx) {
+ // If that set doesn't exist yet, create it.
+ if (!CurGVOnlySetIdx) {
+ CurGVOnlySetIdx = UsedGlobalSets.size();
+ CreateGlobalSet().Globals.set(GI);
+ } else {
+ ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount;
+ }
+
+ GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx;
+ continue;
+ }
+
+ // If we already encountered this BB, just increment the counter.
+ if (UsedGlobalSets[UGSIdx].Globals.test(GI)) {
+ ++UsedGlobalSets[UGSIdx].UsageCount;
+ continue;
+ }
+
+ // If not, the previous set wasn't actually used in this function.
+ --UsedGlobalSets[UGSIdx].UsageCount;
+
+ // If we already expanded the previous set to include this global, just
+ // reuse that expanded set.
+ if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) {
+ ++UsedGlobalSets[ExpandedIdx].UsageCount;
+ GlobalUsesByFunction[ParentFn] = ExpandedIdx;
+ continue;
+ }
+
+ // If not, create a new set consisting of the union of the previous set
+ // and this global. Mark it as encountered, so we can reuse it later.
+ GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] =
+ UsedGlobalSets.size();
+
+ UsedGlobalSet &NewUGS = CreateGlobalSet();
+ NewUGS.Globals.set(GI);
+ NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals;
+ }
+ }
+ }
+
+ // Now we found a bunch of sets of globals used together. We accumulated
+ // the number of times we encountered the sets (i.e., the number of blocks
+ // that use that exact set of globals).
+ //
+ // Multiply that by the size of the set to give us a crude profitability
+ // metric.
+ std::sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
+ [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
+ return UGS1.Globals.count() * UGS1.UsageCount <
+ UGS2.Globals.count() * UGS2.UsageCount;
+ });
+
+ // We can choose to merge all globals together, but ignore globals never used
+ // with another global. This catches the obviously non-profitable cases of
+ // having a single global, but is aggressive enough for any other case.
+ if (GlobalMergeIgnoreSingleUse) {
+ BitVector AllGlobals(Globals.size());
+ for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
+ const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+ if (UGS.UsageCount == 0)
+ continue;
+ if (UGS.Globals.count() > 1)
+ AllGlobals |= UGS.Globals;
+ }
+ return doMerge(Globals, AllGlobals, M, isConst, AddrSpace);
+ }
+
+ // Starting from the sets with the best (=biggest) profitability, find a
+ // good combination.
+ // The ideal (and expensive) solution can only be found by trying all
+ // combinations, looking for the one with the best profitability.
+ // Don't be smart about it, and just pick the first compatible combination,
+ // starting with the sets with the best profitability.
+ BitVector PickedGlobals(Globals.size());
+ bool Changed = false;
+
+ for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
+ const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+ if (UGS.UsageCount == 0)
+ continue;
+ if (PickedGlobals.anyCommon(UGS.Globals))
+ continue;
+ PickedGlobals |= UGS.Globals;
+ // If the set only contains one global, there's no point in merging.
+ // Ignore the global for inclusion in other sets though, so keep it in
+ // PickedGlobals.
+ if (UGS.Globals.count() < 2)
+ continue;
+ Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace);
+ }
+
+ return Changed;
+}
+
+bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
+ const BitVector &GlobalSet, Module &M, bool isConst,
+ unsigned AddrSpace) const {
+ assert(Globals.size() > 1);
+
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ auto &DL = M.getDataLayout();
+
+ DEBUG(dbgs() << " Trying to merge set, starts with #"
+ << GlobalSet.find_first() << "\n");
+
+ ssize_t i = GlobalSet.find_first();
+ while (i != -1) {
+ ssize_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<Type*> Tys;
+ std::vector<Constant*> Inits;
+
+ for (j = i; j != -1; j = GlobalSet.find_next(j)) {
+ Type *Ty = Globals[j]->getValueType();
+ MergedSize += DL.getTypeAllocSize(Ty);
+ if (MergedSize > MaxOffset) {
+ break;
+ }
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ }
+
+ StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+
+ GlobalVariable *MergedGV = new GlobalVariable(
+ M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit,
+ "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+
+ for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
+ GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
+ std::string Name = Globals[k]->getName();
+
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, idx),
+ };
+ Constant *GEP =
+ ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+
+ // When the linkage is not internal we must emit an alias for the original
+ // variable name as it may be accessed from another object. On non-Mach-O
+ // we can also emit an alias for internal linkage as it's safe to do so.
+ // It's not safe on Mach-O as the alias (and thus the portion of the
+ // MergedGlobals variable) may be dead stripped at link time.
+ if (Linkage != GlobalValue::InternalLinkage || !IsMachO) {
+ GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M);
+ }
+
+ NumMerged++;
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the invoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
+
+bool GlobalMerge::doInitialization(Module &M) {
+ if (!EnableGlobalMerge)
+ return false;
+
+ IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO();
+
+ auto &DL = M.getDataLayout();
+ DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
+ BSSGlobals;
+ bool Changed = false;
+ setMustKeepGlobalVariables(M);
+
+ // Grab all non-const globals.
+ for (auto &GV : M.globals()) {
+ // Merge is safe for "normal" internal or external globals only
+ if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection())
+ continue;
+
+ if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
+ !GV.hasInternalLinkage())
+ continue;
+
+ PointerType *PT = dyn_cast<PointerType>(GV.getType());
+ assert(PT && "Global variable is not a pointer!");
+
+ unsigned AddressSpace = PT->getAddressSpace();
+
+ // Ignore fancy-aligned globals for now.
+ unsigned Alignment = DL.getPreferredAlignment(&GV);
+ Type *Ty = GV.getValueType();
+ if (Alignment > DL.getABITypeAlignment(Ty))
+ continue;
+
+ // Ignore all 'special' globals.
+ if (GV.getName().startswith("llvm.") ||
+ GV.getName().startswith(".llvm."))
+ continue;
+
+ // Ignore all "required" globals:
+ if (isMustKeepGlobalVariable(&GV))
+ continue;
+
+ if (DL.getTypeAllocSize(Ty) < MaxOffset) {
+ if (TM &&
+ TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
+ BSSGlobals[AddressSpace].push_back(&GV);
+ else if (GV.isConstant())
+ ConstGlobals[AddressSpace].push_back(&GV);
+ else
+ Globals[AddressSpace].push_back(&GV);
+ }
+ }
+
+ for (auto &P : Globals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, false, P.first);
+
+ for (auto &P : BSSGlobals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, false, P.first);
+
+ if (EnableGlobalMergeOnConst)
+ for (auto &P : ConstGlobals)
+ if (P.second.size() > 1)
+ Changed |= doMerge(P.second, M, true, P.first);
+
+ return Changed;
+}
+
+bool GlobalMerge::runOnFunction(Function &F) {
+ return false;
+}
+
+bool GlobalMerge::doFinalization(Module &M) {
+ MustKeepGlobalVariables.clear();
+ return false;
+}
+
+Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
+ bool OnlyOptimizeForSize,
+ bool MergeExternalByDefault) {
+ bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
+ MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
+ return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
+}
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 000000000000..d225162860c2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1844 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass, which
+// tries to convert conditional branches into predicated instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ifcvt"
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+ cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+ cl::init(true), cl::Hidden);
+
+STATISTIC(NumSimple, "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle, "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
+STATISTIC(NumDupBBs, "Number of duplicated blocks");
+STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
+
+namespace {
+ class IfConverter : public MachineFunctionPass {
+ enum IfcvtKind {
+ ICNotClassfied, // BB data valid, but not classified.
+ ICSimpleFalse, // Same as ICSimple, but on the false path.
+ ICSimple, // BB is entry of an one split, no rejoin sub-CFG.
+ ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition.
+ ICTriangleRev, // Same as ICTriangle, but true path rev condition.
+ ICTriangleFalse, // Same as ICTriangle, but on the false path.
+ ICTriangle, // BB is entry of a triangle sub-CFG.
+ ICDiamond // BB is entry of a diamond sub-CFG.
+ };
+
+ /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+ /// if-conversion feasibility analysis. This includes results from
+ /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its
+ /// classification, and common tail block of its successors (if it's a
+ /// diamond shape), its size, whether it's predicable, and whether any
+ /// instruction can clobber the 'would-be' predicate.
+ ///
+ /// IsDone - True if BB is not to be considered for ifcvt.
+ /// IsBeingAnalyzed - True if BB is currently being analyzed.
+ /// IsAnalyzed - True if BB has been analyzed (info is still valid).
+ /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed.
+ /// IsBrAnalyzable - True if analyzeBranch() returns false.
+ /// HasFallThrough - True if BB may fallthrough to the following BB.
+ /// IsUnpredicable - True if BB is known to be unpredicable.
+ /// ClobbersPred - True if BB could modify predicates (e.g. has
+ /// cmp, call, etc.)
+ /// NonPredSize - Number of non-predicated instructions.
+ /// ExtraCost - Extra cost for multi-cycle instructions.
+ /// ExtraCost2 - Some instructions are slower when predicated
+ /// BB - Corresponding MachineBasicBlock.
+ /// TrueBB / FalseBB- See analyzeBranch().
+ /// BrCond - Conditions for end of block conditional branches.
+ /// Predicate - Predicate used in the BB.
+ struct BBInfo {
+ bool IsDone : 1;
+ bool IsBeingAnalyzed : 1;
+ bool IsAnalyzed : 1;
+ bool IsEnqueued : 1;
+ bool IsBrAnalyzable : 1;
+ bool HasFallThrough : 1;
+ bool IsUnpredicable : 1;
+ bool CannotBeCopied : 1;
+ bool ClobbersPred : 1;
+ unsigned NonPredSize;
+ unsigned ExtraCost;
+ unsigned ExtraCost2;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *TrueBB;
+ MachineBasicBlock *FalseBB;
+ SmallVector<MachineOperand, 4> BrCond;
+ SmallVector<MachineOperand, 4> Predicate;
+ BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+ IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+ HasFallThrough(false), IsUnpredicable(false),
+ CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+ ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
+ FalseBB(nullptr) {}
+ };
+
+ /// IfcvtToken - Record information about pending if-conversions to attempt:
+ /// BBI - Corresponding BBInfo.
+ /// Kind - Type of block. See IfcvtKind.
+ /// NeedSubsumption - True if the to-be-predicated BB has already been
+ /// predicated.
+ /// NumDups - Number of instructions that would be duplicated due
+ /// to this if-conversion. (For diamonds, the number of
+ /// identical instructions at the beginnings of both
+ /// paths).
+ /// NumDups2 - For diamonds, the number of identical instructions
+ /// at the ends of both paths.
+ struct IfcvtToken {
+ BBInfo &BBI;
+ IfcvtKind Kind;
+ bool NeedSubsumption;
+ unsigned NumDups;
+ unsigned NumDups2;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+ : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ };
+
+ /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+ /// basic block number.
+ std::vector<BBInfo> BBAnalysis;
+ TargetSchedModel SchedModel;
+
+ const TargetLoweringBase *TLI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineBranchProbabilityInfo *MBPI;
+ MachineRegisterInfo *MRI;
+
+ LivePhysRegs Redefs;
+ LivePhysRegs DontKill;
+
+ bool PreRegAlloc;
+ bool MadeChange;
+ int FnNum;
+ std::function<bool(const Function &)> PredicateFtor;
+
+ public:
+ static char ID;
+ IfConverter(std::function<bool(const Function &)> Ftor = nullptr)
+ : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) {
+ initializeIfConverterPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
+ private:
+ bool ReverseBranchCondition(BBInfo &BBI);
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ BranchProbability Prediction) const;
+ bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ BranchProbability Prediction) const;
+ bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const;
+ void ScanInstructions(BBInfo &BBI);
+ void AnalyzeBlock(MachineBasicBlock *MBB,
+ std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
+ bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+ bool isTriangle = false, bool RevBranch = false);
+ void AnalyzeBlocks(MachineFunction &MF,
+ std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
+ void InvalidatePreds(MachineBasicBlock *BB);
+ void RemoveExtraEdges(BBInfo &BBI);
+ bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2);
+ void PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> *LaterRedefs = nullptr);
+ void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr = false);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+ unsigned Cycle, unsigned Extra,
+ BranchProbability Prediction) const {
+ return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+ Prediction);
+ }
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+ unsigned TCycle, unsigned TExtra,
+ MachineBasicBlock &FBB,
+ unsigned FCycle, unsigned FExtra,
+ BranchProbability Prediction) const {
+ return TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+ Prediction);
+ }
+
+ // blockAlwaysFallThrough - Block ends without a terminator.
+ bool blockAlwaysFallThrough(BBInfo &BBI) const {
+ return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr;
+ }
+
+ // IfcvtTokenCmp - Used to sort if-conversion candidates.
+ static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1,
+ const std::unique_ptr<IfcvtToken> &C2) {
+ int Incr1 = (C1->Kind == ICDiamond)
+ ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+ int Incr2 = (C2->Kind == ICDiamond)
+ ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+ if (Incr1 > Incr2)
+ return true;
+ else if (Incr1 == Incr2) {
+ // Favors subsumption.
+ if (!C1->NeedSubsumption && C2->NeedSubsumption)
+ return true;
+ else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+ // Favors diamond over triangle, etc.
+ if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+ return true;
+ else if (C1->Kind == C2->Kind)
+ return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+ }
+ }
+ return false;
+ }
+ };
+
+ char IfConverter::ID = 0;
+}
+
+char &llvm::IfConverterID = IfConverter::ID;
+
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()) ||
+ (PredicateFtor && !PredicateFtor(*MF.getFunction())))
+ return false;
+
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ TLI = ST.getTargetLowering();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MRI = &MF.getRegInfo();
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
+
+ if (!TII) return false;
+
+ PreRegAlloc = MRI->isSSA();
+
+ bool BFChange = false;
+ if (!PreRegAlloc) {
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true, false, MBFI, *MBPI);
+ BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+ }
+
+ DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
+ << MF.getName() << "\'");
+
+ if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+ DEBUG(dbgs() << " skipped\n");
+ return false;
+ }
+ DEBUG(dbgs() << "\n");
+
+ MF.RenumberBlocks();
+ BBAnalysis.resize(MF.getNumBlockIDs());
+
+ std::vector<std::unique_ptr<IfcvtToken>> Tokens;
+ MadeChange = false;
+ unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+ NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+ // Do an initial analysis for each basic block and find all the potential
+ // candidates to perform if-conversion.
+ bool Change = false;
+ AnalyzeBlocks(MF, Tokens);
+ while (!Tokens.empty()) {
+ std::unique_ptr<IfcvtToken> Token = std::move(Tokens.back());
+ Tokens.pop_back();
+ BBInfo &BBI = Token->BBI;
+ IfcvtKind Kind = Token->Kind;
+ unsigned NumDups = Token->NumDups;
+ unsigned NumDups2 = Token->NumDups2;
+
+ // If the block has been evicted out of the queue or it has already been
+ // marked dead (due to it being predicated), then skip it.
+ if (BBI.IsDone)
+ BBI.IsEnqueued = false;
+ if (!BBI.IsEnqueued)
+ continue;
+
+ BBI.IsEnqueued = false;
+
+ bool RetVal = false;
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected!");
+ case ICSimple:
+ case ICSimpleFalse: {
+ bool isFalse = Kind == ICSimpleFalse;
+ if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+ DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
+ " false" : "")
+ << "): BB#" << BBI.BB->getNumber() << " ("
+ << ((Kind == ICSimpleFalse)
+ ? BBI.FalseBB->getNumber()
+ : BBI.TrueBB->getNumber()) << ") ");
+ RetVal = IfConvertSimple(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) ++NumSimpleFalse;
+ else ++NumSimple;
+ }
+ break;
+ }
+ case ICTriangle:
+ case ICTriangleRev:
+ case ICTriangleFalse:
+ case ICTriangleFRev: {
+ bool isFalse = Kind == ICTriangleFalse;
+ bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+ if (DisableTriangle && !isFalse && !isRev) break;
+ if (DisableTriangleR && !isFalse && isRev) break;
+ if (DisableTriangleF && isFalse && !isRev) break;
+ if (DisableTriangleFR && isFalse && isRev) break;
+ DEBUG(dbgs() << "Ifcvt (Triangle");
+ if (isFalse)
+ DEBUG(dbgs() << " false");
+ if (isRev)
+ DEBUG(dbgs() << " rev");
+ DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertTriangle(BBI, Kind);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) {
+ if (isFalse) {
+ if (isRev) ++NumTriangleFRev;
+ else ++NumTriangleFalse;
+ } else {
+ if (isRev) ++NumTriangleRev;
+ else ++NumTriangle;
+ }
+ }
+ break;
+ }
+ case ICDiamond: {
+ if (DisableDiamond) break;
+ DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) ++NumDiamonds;
+ break;
+ }
+ }
+
+ Change |= RetVal;
+
+ NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+ NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+ if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+ break;
+ }
+
+ if (!Change)
+ break;
+ MadeChange |= Change;
+ }
+
+ Tokens.clear();
+ BBAnalysis.clear();
+
+ if (MadeChange && IfCvtBranchFold) {
+ BranchFolder BF(false, false, MBFI, *MBPI);
+ BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+ }
+
+ MadeChange |= BFChange;
+ return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *TrueBB) {
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ E = BB->succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ if (SuccBB != TrueBB)
+ return SuccBB;
+ }
+ return nullptr;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ DebugLoc dl; // FIXME: this is nowhere
+ if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+ TII->RemoveBranch(*BBI.BB);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+ std::swap(BBI.TrueBB, BBI.FalseBB);
+ return true;
+ }
+ return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+ MachineFunction::iterator I = BB->getIterator();
+ MachineFunction::iterator E = BB->getParent()->end();
+ if (++I == E)
+ return nullptr;
+ return &*I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ BranchProbability Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.IsBrAnalyzable)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied ||
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+ Prediction))
+ return false;
+ Dups = TrueBBI.NonPredSize;
+ }
+
+ return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the 'false' block rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ bool FalseBranch, unsigned &Dups,
+ BranchProbability Prediction) const {
+ Dups = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+ return false;
+
+ if (TrueBBI.BB->pred_size() > 1) {
+ if (TrueBBI.CannotBeCopied)
+ return false;
+
+ unsigned Size = TrueBBI.NonPredSize;
+ if (TrueBBI.IsBrAnalyzable) {
+ if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+ // Ends with an unconditional branch. It will be removed.
+ --Size;
+ else {
+ MachineBasicBlock *FExit = FalseBranch
+ ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+ if (FExit)
+ // Require a conditional branch
+ ++Size;
+ }
+ }
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
+ return false;
+ Dups = Size;
+ }
+
+ MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+ if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+ MachineFunction::iterator I = TrueBBI.BB->getIterator();
+ if (++I == TrueBBI.BB->getParent()->end())
+ return false;
+ TExit = &*I;
+ }
+ return TExit && TExit == FalseBBI.BB;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+ if (!TT && blockAlwaysFallThrough(TrueBBI))
+ TT = getNextBlock(TrueBBI.BB);
+ if (!FT && blockAlwaysFallThrough(FalseBBI))
+ FT = getNextBlock(FalseBBI.BB);
+ if (TT != FT)
+ return false;
+ if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ return false;
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // FIXME: Allow true block to have an early exit?
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+ (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ return false;
+
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ while (TIB != TIE && FIB != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
+ break;
+ }
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
+ break;
+ }
+ if (!TIB->isIdenticalTo(*FIB))
+ break;
+ ++Dups1;
+ ++TIB;
+ ++FIB;
+ }
+
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, move the end iterators up past any branch instructions.
+ // If both blocks are returning don't skip the branches, since they will
+ // likely be both identical return instructions. In such cases the return
+ // can be left unpredicated.
+ // Check for already containing all of the block.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+ --TIE;
+ --FIE;
+ if (!TrueBBI.BB->succ_empty() || !FalseBBI.BB->succ_empty()) {
+ while (TIE != TIB && TIE->isBranch())
+ --TIE;
+ while (FIE != FIB && FIE->isBranch())
+ --FIE;
+ }
+
+ // If Dups1 includes all of a block, then don't count duplicate
+ // instructions at the end of the blocks.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+
+ // Count duplicate instructions at the ends of the blocks.
+ while (TIE != TIB && FIE != FIB) {
+ // Skip dbg_value instructions. These do not count.
+ if (TIE->isDebugValue()) {
+ while (TIE != TIB && TIE->isDebugValue())
+ --TIE;
+ if (TIE == TIB)
+ break;
+ }
+ if (FIE->isDebugValue()) {
+ while (FIE != FIB && FIE->isDebugValue())
+ --FIE;
+ if (FIE == FIB)
+ break;
+ }
+ if (!TIE->isIdenticalTo(*FIE))
+ break;
+ ++Dups2;
+ --TIE;
+ --FIE;
+ }
+
+ return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+ if (BBI.IsDone)
+ return;
+
+ bool AlreadyPredicated = !BBI.Predicate.empty();
+ // First analyze the end of BB branches.
+ BBI.TrueBB = BBI.FalseBB = nullptr;
+ BBI.BrCond.clear();
+ BBI.IsBrAnalyzable =
+ !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
+
+ if (BBI.BrCond.size()) {
+ // No false branch. This BB must end with a conditional branch and a
+ // fallthrough.
+ if (!BBI.FalseBB)
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ if (!BBI.FalseBB) {
+ // Malformed bcc? True and false blocks are the same?
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+
+ // Then scan all the instructions.
+ BBI.NonPredSize = 0;
+ BBI.ExtraCost = 0;
+ BBI.ExtraCost2 = 0;
+ BBI.ClobbersPred = false;
+ for (auto &MI : *BBI.BB) {
+ if (MI.isDebugValue())
+ continue;
+
+ // It's unsafe to duplicate convergent instructions in this context, so set
+ // BBI.CannotBeCopied to true if MI is convergent. To see why, consider the
+ // following CFG, which is subject to our "simple" transformation.
+ //
+ // BB0 // if (c1) goto BB1; else goto BB2;
+ // / \
+ // BB1 |
+ // | BB2 // if (c2) goto TBB; else goto FBB;
+ // | / |
+ // | / |
+ // TBB |
+ // | |
+ // | FBB
+ // |
+ // exit
+ //
+ // Suppose we want to move TBB's contents up into BB1 and BB2 (in BB1 they'd
+ // be unconditional, and in BB2, they'd be predicated upon c2), and suppose
+ // TBB contains a convergent instruction. This is safe iff doing so does
+ // not add a control-flow dependency to the convergent instruction -- i.e.,
+ // it's safe iff the set of control flows that leads us to the convergent
+ // instruction does not get smaller after the transformation.
+ //
+ // Originally we executed TBB if c1 || c2. After the transformation, there
+ // are two copies of TBB's instructions. We get to the first if c1, and we
+ // get to the second if !c1 && c2.
+ //
+ // There are clearly fewer ways to satisfy the condition "c1" than
+ // "c1 || c2". Since we've shrunk the set of control flows which lead to
+ // our convergent instruction, the transformation is unsafe.
+ if (MI.isNotDuplicable() || MI.isConvergent())
+ BBI.CannotBeCopied = true;
+
+ bool isPredicated = TII->isPredicated(MI);
+ bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch();
+
+ // A conditional branch is not predicable, but it may be eliminated.
+ if (isCondBr)
+ continue;
+
+ if (!isPredicated) {
+ BBI.NonPredSize++;
+ unsigned ExtraPredCost = TII->getPredicationCost(MI);
+ unsigned NumCycles = SchedModel.computeInstrLatency(&MI, false);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ if (BBI.ClobbersPred && !isPredicated) {
+ // Predicate modification instruction should end the block (except for
+ // already predicated instructions and end of block branches).
+ // Predicate may have been modified, the subsequent (currently)
+ // unpredicated instructions cannot be correctly predicated.
+ BBI.IsUnpredicable = true;
+ return;
+ }
+
+ // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+ // still potentially predicable.
+ std::vector<MachineOperand> PredDefs;
+ if (TII->DefinesPredicate(MI, PredDefs))
+ BBI.ClobbersPred = true;
+
+ if (!TII->isPredicable(MI)) {
+ BBI.IsUnpredicable = true;
+ return;
+ }
+ }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+ SmallVectorImpl<MachineOperand> &Pred,
+ bool isTriangle, bool RevBranch) {
+ // If the block is dead or unpredicable, then it cannot be predicated.
+ if (BBI.IsDone || BBI.IsUnpredicable)
+ return false;
+
+ // If it is already predicated but we couldn't analyze its terminator, the
+ // latter might fallthrough, but we can't determine where to.
+ // Conservatively avoid if-converting again.
+ if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)
+ return false;
+
+ // If it is already predicated, check if the new predicate subsumes
+ // its predicate.
+ if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
+ return false;
+
+ if (BBI.BrCond.size()) {
+ if (!isTriangle)
+ return false;
+
+ // Test predicate subsumption.
+ SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (RevBranch) {
+ if (TII->ReverseBranchCondition(Cond))
+ return false;
+ }
+ if (TII->ReverseBranchCondition(RevPred) ||
+ !TII->SubsumesPredicate(Cond, RevPred))
+ return false;
+ }
+
+ return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+void IfConverter::AnalyzeBlock(
+ MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
+ struct BBState {
+ BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {}
+ MachineBasicBlock *MBB;
+
+ /// This flag is true if MBB's successors have been analyzed.
+ bool SuccsAnalyzed;
+ };
+
+ // Push MBB to the stack.
+ SmallVector<BBState, 16> BBStack(1, MBB);
+
+ while (!BBStack.empty()) {
+ BBState &State = BBStack.back();
+ MachineBasicBlock *BB = State.MBB;
+ BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+ if (!State.SuccsAnalyzed) {
+ if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) {
+ BBStack.pop_back();
+ continue;
+ }
+
+ BBI.BB = BB;
+ BBI.IsBeingAnalyzed = true;
+
+ ScanInstructions(BBI);
+
+ // Unanalyzable or ends with fallthrough or unconditional branch, or if is
+ // not considered for ifcvt anymore.
+ if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ continue;
+ }
+
+ // Do not ifcvt if either path is a back edge to the entry block.
+ if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ continue;
+ }
+
+ // Do not ifcvt if true and false fallthrough blocks are the same.
+ if (!BBI.FalseBB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ continue;
+ }
+
+ // Push the False and True blocks to the stack.
+ State.SuccsAnalyzed = true;
+ BBStack.push_back(BBI.FalseBB);
+ BBStack.push_back(BBI.TrueBB);
+ continue;
+ }
+
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+
+ if (TrueBBI.IsDone && FalseBBI.IsDone) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ continue;
+ }
+
+ SmallVector<MachineOperand, 4>
+ RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+ unsigned Dups = 0;
+ unsigned Dups2 = 0;
+ bool TNeedSub = !TrueBBI.Predicate.empty();
+ bool FNeedSub = !FalseBBI.Predicate.empty();
+ bool Enqueued = false;
+
+ BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
+
+ if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+ TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+ *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+ FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+ Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+ // Triangle:
+ // EBB
+ // | \_
+ // | |
+ // | TBB
+ // | /
+ // FBB
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(TrueBBI, Dups, Prediction) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+ // Simple (split, no rejoin):
+ // EBB
+ // | \_
+ // | |
+ // | TBB---> exit
+ // |
+ // FBB
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (CanRevCond) {
+ // Try the other path...
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+ Tokens.push_back(llvm::make_unique<IfcvtToken>(BBI, ICTriangleFalse,
+ FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+ Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups));
+ Enqueued = true;
+ }
+
+ if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, Prediction.getCompl()) &&
+ FeasibilityAnalysis(FalseBBI, RevCond)) {
+ Tokens.push_back(
+ llvm::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups));
+ Enqueued = true;
+ }
+ }
+
+ BBI.IsEnqueued = Enqueued;
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ BBStack.pop_back();
+ }
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates.
+void IfConverter::AnalyzeBlocks(
+ MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
+ for (auto &BB : MF)
+ AnalyzeBlock(&BB, Tokens);
+
+ // Sort to favor more complex ifcvt scheme.
+ std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+ MachineFunction::iterator PI = BB->getIterator();
+ MachineFunction::iterator I = std::next(PI);
+ MachineFunction::iterator TI = ToBB->getIterator();
+ MachineFunction::iterator E = BB->getParent()->end();
+ while (I != TI) {
+ // Check isSuccessor to avoid case where the next block is empty, but
+ // it's not a successor.
+ if (I == E || !I->empty() || !PI->isSuccessor(&*I))
+ return false;
+ PI = I++;
+ }
+ return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+ for (const auto &Predecessor : BB->predecessors()) {
+ BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()];
+ if (PBBI.IsDone || PBBI.BB == BB)
+ continue;
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc dl; // FIXME: this is nowhere
+ SmallVector<MachineOperand, 0> NoCond;
+ TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->analyzeBranch(*BBI.BB, TBB, FBB, Cond))
+ BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
+/// values defined in MI which are not live/used by MI.
+static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
+ SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
+ Redefs.stepForward(MI, Clobbers);
+
+ // Now add the implicit uses for each of the clobbered values.
+ for (auto Reg : Clobbers) {
+ // FIXME: Const cast here is nasty, but better than making StepForward
+ // take a mutable instruction instead of const.
+ MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second);
+ MachineInstr *OpMI = Op.getParent();
+ MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI);
+ if (Op.isRegMask()) {
+ // First handle regmasks. They clobber any entries in the mask which
+ // means that we need a def for those registers.
+ MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+
+ // We also need to add an implicit def of this register for the later
+ // use to read from.
+ // For the register allocator to have allocated a register clobbered
+ // by the call which is used later, it must be the case that
+ // the call doesn't return.
+ MIB.addReg(Reg.first, RegState::Implicit | RegState::Define);
+ continue;
+ }
+ assert(Op.isReg() && "Register operand required");
+ if (Op.isDead()) {
+ // If we found a dead def, but it needs to be live, then remove the dead
+ // flag.
+ if (Redefs.contains(Op.getReg()))
+ Op.setIsDead(false);
+ }
+ MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+ }
+}
+
+/**
+ * Remove kill flags from operands with a registers in the @p DontKill set.
+ */
+static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
+ for (MIBundleOperands O(MI); O.isValid(); ++O) {
+ if (!O->isReg() || !O->isKill())
+ continue;
+ if (DontKill.contains(O->getReg()))
+ O->setIsKill(false);
+ }
+}
+
+/**
+ * Walks a range of machine instructions and removes kill flags for registers
+ * in the @p DontKill set.
+ */
+static void RemoveKills(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ const LivePhysRegs &DontKill,
+ const MCRegisterInfo &MCRI) {
+ for ( ; I != E; ++I)
+ RemoveKills(*I, DontKill);
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICSimpleFalse)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (CvtBBI->BB->hasAddressTaken())
+ // Conservatively abort if-conversion if BB's address is taken.
+ return false;
+
+ if (Kind == ICSimpleFalse)
+ if (TII->ReverseBranchCondition(Cond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ Redefs.init(TRI);
+ Redefs.addLiveIns(*CvtBBI->BB);
+ Redefs.addLiveIns(*NextBBI->BB);
+
+ // Compute a set of registers which must not be killed by instructions in
+ // BB1: This is everything live-in to BB2.
+ DontKill.init(TRI);
+ DontKill.addLiveIns(*NextBBI->BB);
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+ // explicitly remove CvtBBI as a successor.
+ BBI.BB->removeSuccessor(CvtBBI->BB, true);
+ } else {
+ RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+ // Merge converted block into entry block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI);
+ }
+
+ bool IterIfcvt = true;
+ if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ // Now ifcvt'd block will look like this:
+ // BB:
+ // ...
+ // t, f = cmp
+ // if t op
+ // b BBf
+ //
+ // We cannot further ifcvt this block because the unconditional branch
+ // will have to be predicated on the new condition, that will not be
+ // available if cmp executes.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ BBInfo *CvtBBI = &TrueBBI;
+ BBInfo *NextBBI = &FalseBBI;
+ DebugLoc dl; // FIXME: this is nowhere
+
+ SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ std::swap(CvtBBI, NextBBI);
+
+ if (CvtBBI->IsDone ||
+ (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+ // Something has changed. It's no longer safe to predicate this block.
+ BBI.IsAnalyzed = false;
+ CvtBBI->IsAnalyzed = false;
+ return false;
+ }
+
+ if (CvtBBI->BB->hasAddressTaken())
+ // Conservatively abort if-conversion if BB's address is taken.
+ return false;
+
+ if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+ if (TII->ReverseBranchCondition(Cond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+ if (ReverseBranchCondition(*CvtBBI)) {
+ // BB has been changed, modify its predecessors (except for this
+ // one) so they don't get ifcvt'ed based on bad intel.
+ for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+ E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PBB = *PI;
+ if (PBB == BBI.BB)
+ continue;
+ BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+ if (PBBI.IsEnqueued) {
+ PBBI.IsAnalyzed = false;
+ PBBI.IsEnqueued = false;
+ }
+ }
+ }
+ }
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ Redefs.init(TRI);
+ Redefs.addLiveIns(*CvtBBI->BB);
+ Redefs.addLiveIns(*NextBBI->BB);
+
+ DontKill.clear();
+
+ bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
+ BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
+
+ if (HasEarlyExit) {
+ // Get probabilities before modifying CvtBBI->BB and BBI.BB.
+ CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB);
+ CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB);
+ BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB);
+ BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB);
+ }
+
+ if (CvtBBI->BB->pred_size() > 1) {
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Copy instructions in the true block, predicate them, and add them to
+ // the entry block.
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+ // explicitly remove CvtBBI as a successor.
+ BBI.BB->removeSuccessor(CvtBBI->BB, true);
+ } else {
+ // Predicate the 'true' block after removing its branch.
+ CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+
+ // Now merge the entry of the triangle with the true block.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ MergeBlocks(BBI, *CvtBBI, false);
+ }
+
+ // If 'true' block has a 'false' successor, add an exit branch to it.
+ if (HasEarlyExit) {
+ SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+ CvtBBI->BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ llvm_unreachable("Unable to reverse branch condition!");
+
+ // Update the edge probability for both CvtBBI->FalseBB and NextBBI.
+ // NewNext = New_Prob(BBI.BB, NextBBI->BB) =
+ // Prob(BBI.BB, NextBBI->BB) +
+ // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB)
+ // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
+ // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB)
+ auto NewTrueBB = getNextBlock(BBI.BB);
+ auto NewNext = BBNext + BBCvt * CvtNext;
+ auto NewTrueBBIter =
+ std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB);
+ if (NewTrueBBIter != BBI.BB->succ_end())
+ BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
+
+ auto NewFalse = BBCvt * CvtFalse;
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
+ BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
+ }
+
+ // Merge in the 'false' block if the 'false' block has no other
+ // predecessors. Otherwise, add an unconditional branch to 'false'.
+ bool FalseBBDead = false;
+ bool IterIfcvt = true;
+ bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+ if (!isFallThrough) {
+ // Only merge them if the true block does not fallthrough to the false
+ // block. By not merging them, we make it possible to iteratively
+ // ifcvt the blocks.
+ if (!HasEarlyExit &&
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough &&
+ !NextBBI->BB->hasAddressTaken()) {
+ MergeBlocks(BBI, *NextBBI);
+ FalseBBDead = true;
+ } else {
+ InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+ BBI.HasFallThrough = false;
+ }
+ // Mixed predicated and unpredicated code. This cannot be iteratively
+ // predicated.
+ IterIfcvt = false;
+ }
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info. BB can be iteratively if-converted.
+ if (!IterIfcvt)
+ BBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+ CvtBBI->IsDone = true;
+ if (FalseBBDead)
+ NextBBI->IsDone = true;
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (TrueBBI.IsDone || FalseBBI.IsDone ||
+ TrueBBI.BB->pred_size() > 1 ||
+ FalseBBI.BB->pred_size() > 1) {
+ // Something has changed. It's no longer safe to predicate these blocks.
+ BBI.IsAnalyzed = false;
+ TrueBBI.IsAnalyzed = false;
+ FalseBBI.IsAnalyzed = false;
+ return false;
+ }
+
+ if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken())
+ // Conservatively abort if-conversion if either BB has its address taken.
+ return false;
+
+ // Put the predicated instructions from the 'true' block before the
+ // instructions from the 'false' block, unless the true block would clobber
+ // the predicate, in which case, do the opposite.
+ BBInfo *BBI1 = &TrueBBI;
+ BBInfo *BBI2 = &FalseBBI;
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ if (TII->ReverseBranchCondition(RevCond))
+ llvm_unreachable("Unable to reverse branch condition!");
+ SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+ SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+ // Figure out the more profitable ordering.
+ bool DoSwap = false;
+ if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ DoSwap = true;
+ else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+ DoSwap = true;
+ }
+ if (DoSwap) {
+ std::swap(BBI1, BBI2);
+ std::swap(Cond1, Cond2);
+ }
+
+ // Remove the conditional branch from entry to the blocks.
+ BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ Redefs.init(TRI);
+ Redefs.addLiveIns(*BBI1->BB);
+
+ // Remove the duplicated instructions at the beginnings of both paths.
+ // Skip dbg_value instructions
+ MachineBasicBlock::iterator DI1 = BBI1->BB->getFirstNonDebugInstr();
+ MachineBasicBlock::iterator DI2 = BBI2->BB->getFirstNonDebugInstr();
+ BBI1->NonPredSize -= NumDups1;
+ BBI2->NonPredSize -= NumDups1;
+
+ // Skip past the dups on each side separately since there may be
+ // differing dbg_value entries.
+ for (unsigned i = 0; i < NumDups1; ++DI1) {
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ while (NumDups1 != 0) {
+ ++DI2;
+ if (!DI2->isDebugValue())
+ --NumDups1;
+ }
+
+ // Compute a set of registers which must not be killed by instructions in BB1:
+ // This is everything used+live in BB2 after the duplicated instructions. We
+ // can compute this set by simulating liveness backwards from the end of BB2.
+ DontKill.init(TRI);
+ for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(),
+ E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) {
+ DontKill.stepBackward(*I);
+ }
+
+ for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
+ ++I) {
+ SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
+ Redefs.stepForward(*I, IgnoredClobbers);
+ }
+ BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+ BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+ // Remove branch from the 'true' block, unless it was not analyzable.
+ // Non-analyzable branches need to be preserved, since in such cases,
+ // the CFG structure is not an actual diamond (the join block may not
+ // be present).
+ if (BBI1->IsBrAnalyzable)
+ BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+ // Remove duplicated instructions.
+ DI1 = BBI1->BB->end();
+ for (unsigned i = 0; i != NumDups2; ) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI1 != BBI1->BB->begin());
+ --DI1;
+ // skip dbg_value instructions
+ if (!DI1->isDebugValue())
+ ++i;
+ }
+ BBI1->BB->erase(DI1, BBI1->BB->end());
+
+ // Kill flags in the true block for registers living into the false block
+ // must be removed.
+ RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);
+
+ // Remove 'false' block branch (unless it was not analyzable), and find
+ // the last instruction to predicate.
+ if (BBI2->IsBrAnalyzable)
+ BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+ DI2 = BBI2->BB->end();
+ while (NumDups2 != 0) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI2 != BBI2->BB->begin());
+ --DI2;
+ // skip dbg_value instructions
+ if (!DI2->isDebugValue())
+ --NumDups2;
+ }
+
+ // Remember which registers would later be defined by the false block.
+ // This allows us not to predicate instructions in the true block that would
+ // later be re-defined. That is, rather than
+ // subeq r0, r1, #1
+ // addne r0, r1, #1
+ // generate:
+ // sub r0, r1, #1
+ // addne r0, r1, #1
+ SmallSet<unsigned, 4> RedefsByFalse;
+ SmallSet<unsigned, 4> ExtUses;
+ if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
+ for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
+ if (FI->isDebugValue())
+ continue;
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = FI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ } else if (!RedefsByFalse.count(Reg)) {
+ // These are defined before ctrl flow reach the 'false' instructions.
+ // They cannot be modified by the 'true' instructions.
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ ExtUses.insert(*SubRegs);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (!ExtUses.count(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ RedefsByFalse.insert(*SubRegs);
+ }
+ }
+ }
+ }
+
+ // Predicate the 'true' block.
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse);
+
+ // After predicating BBI1, if there is a predicated terminator in BBI1 and
+ // a non-predicated in BBI2, then we don't want to predicate the one from
+ // BBI2. The reason is that if we merged these blocks, we would end up with
+ // two predicated terminators in the same block.
+ if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) {
+ MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator();
+ MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator();
+ if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) &&
+ BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T))
+ --DI2;
+ }
+
+ // Predicate the 'false' block.
+ PredicateBlock(*BBI2, DI2, *Cond2);
+
+ // Merge the true block into the entry of the diamond.
+ MergeBlocks(BBI, *BBI1, TailBB == nullptr);
+ MergeBlocks(BBI, *BBI2, TailBB == nullptr);
+
+ // If the if-converted block falls through or unconditionally branches into
+ // the tail block, and the tail block does not have other predecessors, then
+ // fold the tail block in as well. Otherwise, unless it falls through to the
+ // tail, add a unconditional branch to it.
+ if (TailBB) {
+ BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
+ bool CanMergeTail = !TailBBI.HasFallThrough &&
+ !TailBBI.BB->hasAddressTaken();
+ // The if-converted block can still have a predicated terminator
+ // (e.g. a predicated return). If that is the case, we cannot merge
+ // it with the tail block.
+ MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator();
+ if (TI != BBI.BB->end() && TII->isPredicated(*TI))
+ CanMergeTail = false;
+ // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+ // check if there are any other predecessors besides those.
+ unsigned NumPreds = TailBB->pred_size();
+ if (NumPreds > 1)
+ CanMergeTail = false;
+ else if (NumPreds == 1 && CanMergeTail) {
+ MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+ if (*PI != BBI1->BB && *PI != BBI2->BB)
+ CanMergeTail = false;
+ }
+ if (CanMergeTail) {
+ MergeBlocks(BBI, TailBBI);
+ TailBBI.IsDone = true;
+ } else {
+ BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
+ InsertUncondBranch(BBI.BB, TailBB, TII);
+ BBI.HasFallThrough = false;
+ }
+ }
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch,
+ // which can happen here if TailBB is unanalyzable and is merged, so
+ // explicitly remove BBI1 and BBI2 as successors.
+ BBI.BB->removeSuccessor(BBI1->BB);
+ BBI.BB->removeSuccessor(BBI2->BB, true);
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+static bool MaySpeculate(const MachineInstr &MI,
+ SmallSet<unsigned, 4> &LaterRedefs) {
+ bool SawStore = true;
+ if (!MI.isSafeToMove(nullptr, SawStore))
+ return false;
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef() && !LaterRedefs.count(Reg))
+ return false;
+ }
+
+ return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+ MachineBasicBlock::iterator E,
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> *LaterRedefs) {
+ bool AnyUnpred = false;
+ bool MaySpec = LaterRedefs != nullptr;
+ for (MachineInstr &I : llvm::make_range(BBI.BB->begin(), E)) {
+ if (I.isDebugValue() || TII->isPredicated(I))
+ continue;
+ // It may be possible not to predicate an instruction if it's the 'true'
+ // side of a diamond and the 'false' side may re-define the instruction's
+ // defs.
+ if (MaySpec && MaySpeculate(I, *LaterRedefs)) {
+ AnyUnpred = true;
+ continue;
+ }
+ // If any instruction is predicated, then every instruction after it must
+ // be predicated.
+ MaySpec = false;
+ if (!TII->PredicateInstruction(I, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << I << "!\n";
+#endif
+ llvm_unreachable(nullptr);
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(I, Redefs);
+ }
+
+ BBI.Predicate.append(Cond.begin(), Cond.end());
+
+ BBI.IsAnalyzed = false;
+ BBI.NonPredSize = 0;
+
+ ++NumIfConvBBs;
+ if (AnyUnpred)
+ ++NumUnpred;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool IgnoreBr) {
+ MachineFunction &MF = *ToBBI.BB->getParent();
+
+ for (auto &I : *FromBBI.BB) {
+ // Do not copy the end of the block branches.
+ if (IgnoreBr && I.isBranch())
+ break;
+
+ MachineInstr *MI = MF.CloneMachineInstr(&I);
+ ToBBI.BB->insert(ToBBI.BB->end(), MI);
+ ToBBI.NonPredSize++;
+ unsigned ExtraPredCost = TII->getPredicationCost(I);
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ ToBBI.ExtraCost += NumCycles-1;
+ ToBBI.ExtraCost2 += ExtraPredCost;
+
+ if (!TII->isPredicated(I) && !MI->isDebugValue()) {
+ if (!TII->PredicateInstruction(*MI, Cond)) {
+#ifndef NDEBUG
+ dbgs() << "Unable to predicate " << I << "!\n";
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(*MI, Redefs);
+
+ // Some kill flags may not be correct anymore.
+ if (!DontKill.empty())
+ RemoveKills(*MI, DontKill);
+ }
+
+ if (!IgnoreBr) {
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
+
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
+ }
+
+ ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
+ ToBBI.Predicate.append(Cond.begin(), Cond.end());
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.IsAnalyzed = false;
+
+ ++NumDupBBs;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+/// This will leave FromBB as an empty block, so remove all of its
+/// successor edges except for the fall-through edge. If AddEdges is true,
+/// i.e., when FromBBI's branch is being moved, add those successor edges to
+/// ToBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+ assert(!FromBBI.BB->hasAddressTaken() &&
+ "Removing a BB whose address is taken!");
+
+ // In case FromBBI.BB contains terminators (e.g. return instruction),
+ // first move the non-terminator instructions, then the terminators.
+ MachineBasicBlock::iterator FromTI = FromBBI.BB->getFirstTerminator();
+ MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator();
+ ToBBI.BB->splice(ToTI, FromBBI.BB, FromBBI.BB->begin(), FromTI);
+
+ // If FromBB has non-predicated terminator we should copy it at the end.
+ if (FromTI != FromBBI.BB->end() && !TII->isPredicated(*FromTI))
+ ToTI = ToBBI.BB->end();
+ ToBBI.BB->splice(ToTI, FromBBI.BB, FromTI, FromBBI.BB->end());
+
+ // Force normalizing the successors' probabilities of ToBBI.BB to convert all
+ // unknown probabilities into known ones.
+ // FIXME: This usage is too tricky and in the future we would like to
+ // eliminate all unknown probabilities in MBB.
+ ToBBI.BB->normalizeSuccProbs();
+
+ SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
+ // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when
+ // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB.
+ auto To2FromProb = BranchProbability::getZero();
+ if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) {
+ To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB);
+ // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the
+ // edge probability being merged to other edges when this edge is removed
+ // later.
+ ToBBI.BB->setSuccProbability(
+ std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB),
+ BranchProbability::getZero());
+ }
+
+ for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = FromSuccs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+
+ auto NewProb = BranchProbability::getZero();
+ if (AddEdges) {
+ // Calculate the edge probability for the edge from ToBBI.BB to Succ,
+ // which is a portion of the edge probability from FromBBI.BB to Succ. The
+ // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if
+ // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
+ NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ);
+
+ // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This
+ // only happens when if-converting a diamond CFG and FromBBI.BB is the
+ // tail BB. In this case FromBBI.BB post-dominates ToBBI.BB and hence we
+ // could just use the probabilities on FromBBI.BB's out-edges when adding
+ // new successors.
+ if (!To2FromProb.isZero())
+ NewProb *= To2FromProb;
+ }
+
+ FromBBI.BB->removeSuccessor(Succ);
+
+ if (AddEdges) {
+ // If the edge from ToBBI.BB to Succ already exists, update the
+ // probability of this edge by adding NewProb to it. An example is shown
+ // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we
+ // don't have to set C as A's successor as it already is. We only need to
+ // update the edge probability on A->C. Note that B will not be
+ // immediately removed from A's successors. It is possible that B->D is
+ // not removed either if D is a fallthrough of B. Later the edge A->D
+ // (generated here) and B->D will be combined into one edge. To maintain
+ // correct edge probability of this combined edge, we need to set the edge
+ // probability of A->B to zero, which is already done above. The edge
+ // probability on A->D is calculated by scaling the original probability
+ // on A->B by the probability of B->D.
+ //
+ // Before ifcvt: After ifcvt (assume B->D is kept):
+ //
+ // A A
+ // /| /|\
+ // / B / B|
+ // | /| | ||
+ // |/ | | |/
+ // C D C D
+ //
+ if (ToBBI.BB->isSuccessor(Succ))
+ ToBBI.BB->setSuccProbability(
+ std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ),
+ MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
+ else
+ ToBBI.BB->addSuccessor(Succ, NewProb);
+ }
+ }
+
+ // Now FromBBI always falls through to the next block!
+ if (NBB && !FromBBI.BB->isSuccessor(NBB))
+ FromBBI.BB->addSuccessor(NBB);
+
+ // Normalize the probabilities of ToBBI.BB's successors with all adjustment
+ // we've done above.
+ ToBBI.BB->normalizeSuccProbs();
+
+ ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
+ FromBBI.Predicate.clear();
+
+ ToBBI.NonPredSize += FromBBI.NonPredSize;
+ ToBBI.ExtraCost += FromBBI.ExtraCost;
+ ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
+ FromBBI.NonPredSize = 0;
+ FromBBI.ExtraCost = 0;
+ FromBBI.ExtraCost2 = 0;
+
+ ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+ ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+ ToBBI.IsAnalyzed = false;
+ FromBBI.IsAnalyzed = false;
+}
+
+FunctionPass *
+llvm::createIfConverter(std::function<bool(const Function &)> Ftor) {
+ return new IfConverter(std::move(Ftor));
+}
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
new file mode 100644
index 000000000000..31d6bd0b6dc6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -0,0 +1,576 @@
+//===-- ImplicitNullChecks.cpp - Fold null checks into memory accesses ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass turns explicit null checks of the form
+//
+// test %r10, %r10
+// je throw_npe
+// movl (%r10), %esi
+// ...
+//
+// to
+//
+// faulting_load_op("movl (%r10), %esi", throw_npe)
+// ...
+//
+// With the help of a runtime that understands the .fault_maps section,
+// faulting_load_op branches to throw_npe if executing movl (%r10), %esi incurs
+// a page fault.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<int> PageSize("imp-null-check-page-size",
+ cl::desc("The page size of the target in bytes"),
+ cl::init(4096));
+
+#define DEBUG_TYPE "implicit-null-checks"
+
+STATISTIC(NumImplicitNullChecks,
+ "Number of explicit null checks made implicit");
+
+namespace {
+
+class ImplicitNullChecks : public MachineFunctionPass {
+ /// Represents one null check that can be made implicit.
+ class NullCheck {
+ // The memory operation the null check can be folded into.
+ MachineInstr *MemOperation;
+
+ // The instruction actually doing the null check (Ptr != 0).
+ MachineInstr *CheckOperation;
+
+ // The block the check resides in.
+ MachineBasicBlock *CheckBlock;
+
+ // The block branched to if the pointer is non-null.
+ MachineBasicBlock *NotNullSucc;
+
+ // The block branched to if the pointer is null.
+ MachineBasicBlock *NullSucc;
+
+ // If this is non-null, then MemOperation has a dependency on on this
+ // instruction; and it needs to be hoisted to execute before MemOperation.
+ MachineInstr *OnlyDependency;
+
+ public:
+ explicit NullCheck(MachineInstr *memOperation, MachineInstr *checkOperation,
+ MachineBasicBlock *checkBlock,
+ MachineBasicBlock *notNullSucc,
+ MachineBasicBlock *nullSucc,
+ MachineInstr *onlyDependency)
+ : MemOperation(memOperation), CheckOperation(checkOperation),
+ CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc),
+ OnlyDependency(onlyDependency) {}
+
+ MachineInstr *getMemOperation() const { return MemOperation; }
+
+ MachineInstr *getCheckOperation() const { return CheckOperation; }
+
+ MachineBasicBlock *getCheckBlock() const { return CheckBlock; }
+
+ MachineBasicBlock *getNotNullSucc() const { return NotNullSucc; }
+
+ MachineBasicBlock *getNullSucc() const { return NullSucc; }
+
+ MachineInstr *getOnlyDependency() const { return OnlyDependency; }
+ };
+
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ MachineModuleInfo *MMI = nullptr;
+
+ bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
+ SmallVectorImpl<NullCheck> &NullCheckList);
+ MachineInstr *insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB,
+ MachineBasicBlock *HandlerMBB);
+ void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
+
+public:
+ static char ID;
+
+ ImplicitNullChecks() : MachineFunctionPass(ID) {
+ initializeImplicitNullChecksPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+};
+
+/// \brief Detect re-ordering hazards and dependencies.
+///
+/// This class keeps track of defs and uses, and can be queried if a given
+/// machine instruction can be re-ordered from after the machine instructions
+/// seen so far to before them.
+class HazardDetector {
+ static MachineInstr *getUnknownMI() {
+ return DenseMapInfo<MachineInstr *>::getTombstoneKey();
+ }
+
+ // Maps physical registers to the instruction defining them. If there has
+ // been more than one def of an specific register, that register is mapped to
+ // getUnknownMI().
+ DenseMap<unsigned, MachineInstr *> RegDefs;
+ DenseSet<unsigned> RegUses;
+ const TargetRegisterInfo &TRI;
+ bool hasSeenClobber;
+ AliasAnalysis &AA;
+
+public:
+ explicit HazardDetector(const TargetRegisterInfo &TRI, AliasAnalysis &AA)
+ : TRI(TRI), hasSeenClobber(false), AA(AA) {}
+
+ /// \brief Make a note of \p MI for later queries to isSafeToHoist.
+ ///
+ /// May clobber this HazardDetector instance. \see isClobbered.
+ void rememberInstruction(MachineInstr *MI);
+
+ /// \brief Return true if it is safe to hoist \p MI from after all the
+ /// instructions seen so far (via rememberInstruction) to before it. If \p MI
+ /// has one and only one transitive dependency, set \p Dependency to that
+ /// instruction. If there are more dependencies, return false.
+ bool isSafeToHoist(MachineInstr *MI, MachineInstr *&Dependency);
+
+ /// \brief Return true if this instance of HazardDetector has been clobbered
+ /// (i.e. has no more useful information).
+ ///
+ /// A HazardDetecter is clobbered when it sees a construct it cannot
+ /// understand, and it would have to return a conservative answer for all
+ /// future queries. Having a separate clobbered state lets the client code
+ /// bail early, without making queries about all of the future instructions
+ /// (which would have returned the most conservative answer anyway).
+ ///
+ /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector
+ /// is an error.
+ bool isClobbered() { return hasSeenClobber; }
+};
+}
+
+
+void HazardDetector::rememberInstruction(MachineInstr *MI) {
+ assert(!isClobbered() &&
+ "Don't add instructions to a clobbered hazard detector");
+
+ if (MI->mayStore() || MI->hasUnmodeledSideEffects()) {
+ hasSeenClobber = true;
+ return;
+ }
+
+ for (auto *MMO : MI->memoperands()) {
+ // Right now we don't want to worry about LLVM's memory model.
+ if (!MMO->isUnordered()) {
+ hasSeenClobber = true;
+ return;
+ }
+ }
+
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+
+ if (MO.isDef()) {
+ auto It = RegDefs.find(MO.getReg());
+ if (It == RegDefs.end())
+ RegDefs.insert({MO.getReg(), MI});
+ else {
+ assert(It->second && "Found null MI?");
+ It->second = getUnknownMI();
+ }
+ } else
+ RegUses.insert(MO.getReg());
+ }
+}
+
+bool HazardDetector::isSafeToHoist(MachineInstr *MI,
+ MachineInstr *&Dependency) {
+ assert(!isClobbered() && "isSafeToHoist cannot do anything useful!");
+ Dependency = nullptr;
+
+ // Right now we don't want to worry about LLVM's memory model. This can be
+ // made more precise later.
+ for (auto *MMO : MI->memoperands())
+ if (!MMO->isUnordered())
+ return false;
+
+ for (auto &MO : MI->operands()) {
+ if (MO.isReg() && MO.getReg()) {
+ for (auto &RegDef : RegDefs) {
+ unsigned Reg = RegDef.first;
+ MachineInstr *MI = RegDef.second;
+ if (!TRI.regsOverlap(Reg, MO.getReg()))
+ continue;
+
+ // We found a write-after-write or read-after-write, see if the
+ // instruction causing this dependency can be hoisted too.
+
+ if (MI == getUnknownMI())
+ // We don't have precise dependency information.
+ return false;
+
+ if (Dependency) {
+ if (Dependency == MI)
+ continue;
+ // We already have one dependency, and we can track only one.
+ return false;
+ }
+
+ // Now check if MI is actually a dependency that can be hoisted.
+
+ // We don't want to track transitive dependencies. We already know that
+ // MI is the only instruction that defines Reg, but we need to be sure
+ // that it does not use any registers that have been defined (trivially
+ // checked below by ensuring that there are no register uses), and that
+ // it is the only def for every register it defines (otherwise we could
+ // violate a write after write hazard).
+ auto IsMIOperandSafe = [&](MachineOperand &MO) {
+ if (!MO.isReg() || !MO.getReg())
+ return true;
+ if (MO.isUse())
+ return false;
+ assert((!MO.isDef() || RegDefs.count(MO.getReg())) &&
+ "All defs must be tracked in RegDefs by now!");
+ return !MO.isDef() || RegDefs.find(MO.getReg())->second == MI;
+ };
+
+ if (!all_of(MI->operands(), IsMIOperandSafe))
+ return false;
+
+ // Now check for speculation safety:
+ bool SawStore = true;
+ if (!MI->isSafeToMove(&AA, SawStore) || MI->mayLoad())
+ return false;
+
+ Dependency = MI;
+ }
+
+ if (MO.isDef())
+ for (unsigned Reg : RegUses)
+ if (TRI.regsOverlap(Reg, MO.getReg()))
+ return false; // We found a write-after-read
+ }
+ }
+
+ return true;
+}
+
+bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getRegInfo().getTargetRegisterInfo();
+ MMI = &MF.getMMI();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ SmallVector<NullCheck, 16> NullCheckList;
+
+ for (auto &MBB : MF)
+ analyzeBlockForNullChecks(MBB, NullCheckList);
+
+ if (!NullCheckList.empty())
+ rewriteNullChecks(NullCheckList);
+
+ return !NullCheckList.empty();
+}
+
+// Return true if any register aliasing \p Reg is live-in into \p MBB.
+static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
+ MachineBasicBlock *MBB, unsigned Reg) {
+ for (MCRegAliasIterator AR(Reg, TRI, /*IncludeSelf*/ true); AR.isValid();
+ ++AR)
+ if (MBB->isLiveIn(*AR))
+ return true;
+ return false;
+}
+
+/// Analyze MBB to check if its terminating branch can be turned into an
+/// implicit null check. If yes, append a description of the said null check to
+/// NullCheckList and return true, else return false.
+bool ImplicitNullChecks::analyzeBlockForNullChecks(
+ MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
+ typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
+
+ MDNode *BranchMD = nullptr;
+ if (auto *BB = MBB.getBasicBlock())
+ BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit);
+
+ if (!BranchMD)
+ return false;
+
+ MachineBranchPredicate MBP;
+
+ if (TII->analyzeBranchPredicate(MBB, MBP, true))
+ return false;
+
+ // Is the predicate comparing an integer to zero?
+ if (!(MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
+ (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
+ MBP.Predicate == MachineBranchPredicate::PRED_EQ)))
+ return false;
+
+ // If we cannot erase the test instruction itself, then making the null check
+ // implicit does not buy us much.
+ if (!MBP.SingleUseCondition)
+ return false;
+
+ MachineBasicBlock *NotNullSucc, *NullSucc;
+
+ if (MBP.Predicate == MachineBranchPredicate::PRED_NE) {
+ NotNullSucc = MBP.TrueDest;
+ NullSucc = MBP.FalseDest;
+ } else {
+ NotNullSucc = MBP.FalseDest;
+ NullSucc = MBP.TrueDest;
+ }
+
+ // We handle the simplest case for now. We can potentially do better by using
+ // the machine dominator tree.
+ if (NotNullSucc->pred_size() != 1)
+ return false;
+
+ // Starting with a code fragment like:
+ //
+ // test %RAX, %RAX
+ // jne LblNotNull
+ //
+ // LblNull:
+ // callq throw_NullPointerException
+ //
+ // LblNotNull:
+ // Inst0
+ // Inst1
+ // ...
+ // Def = Load (%RAX + <offset>)
+ // ...
+ //
+ //
+ // we want to end up with
+ //
+ // Def = FaultingLoad (%RAX + <offset>), LblNull
+ // jmp LblNotNull ;; explicit or fallthrough
+ //
+ // LblNotNull:
+ // Inst0
+ // Inst1
+ // ...
+ //
+ // LblNull:
+ // callq throw_NullPointerException
+ //
+ //
+ // To see why this is legal, consider the two possibilities:
+ //
+ // 1. %RAX is null: since we constrain <offset> to be less than PageSize, the
+ // load instruction dereferences the null page, causing a segmentation
+ // fault.
+ //
+ // 2. %RAX is not null: in this case we know that the load cannot fault, as
+ // otherwise the load would've faulted in the original program too and the
+ // original program would've been undefined.
+ //
+ // This reasoning cannot be extended to justify hoisting through arbitrary
+ // control flow. For instance, in the example below (in pseudo-C)
+ //
+ // if (ptr == null) { throw_npe(); unreachable; }
+ // if (some_cond) { return 42; }
+ // v = ptr->field; // LD
+ // ...
+ //
+ // we cannot (without code duplication) use the load marked "LD" to null check
+ // ptr -- clause (2) above does not apply in this case. In the above program
+ // the safety of ptr->field can be dependent on some_cond; and, for instance,
+ // ptr could be some non-null invalid reference that never gets loaded from
+ // because some_cond is always true.
+
+ unsigned PointerReg = MBP.LHS.getReg();
+
+ HazardDetector HD(*TRI, *AA);
+
+ for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
+ ++MII) {
+ MachineInstr &MI = *MII;
+ unsigned BaseReg;
+ int64_t Offset;
+ MachineInstr *Dependency = nullptr;
+ if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+ if (MI.mayLoad() && !MI.isPredicable() && BaseReg == PointerReg &&
+ Offset < PageSize && MI.getDesc().getNumDefs() <= 1 &&
+ HD.isSafeToHoist(&MI, Dependency)) {
+
+ auto DependencyOperandIsOk = [&](MachineOperand &MO) {
+ assert(!(MO.isReg() && MO.isUse()) &&
+ "No transitive dependendencies please!");
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ return true;
+
+ // Make sure that we won't clobber any live ins to the sibling block
+ // by hoisting Dependency. For instance, we can't hoist INST to
+ // before the null check (even if it safe, and does not violate any
+ // dependencies in the non_null_block) if %rdx is live in to
+ // _null_block.
+ //
+ // test %rcx, %rcx
+ // je _null_block
+ // _non_null_block:
+ // %rdx<def> = INST
+ // ...
+ if (AnyAliasLiveIn(TRI, NullSucc, MO.getReg()))
+ return false;
+
+ // Make sure Dependency isn't re-defining the base register. Then we
+ // won't get the memory operation on the address we want.
+ if (TRI->regsOverlap(MO.getReg(), BaseReg))
+ return false;
+
+ return true;
+ };
+
+ bool DependencyOperandsAreOk =
+ !Dependency ||
+ all_of(Dependency->operands(), DependencyOperandIsOk);
+
+ if (DependencyOperandsAreOk) {
+ NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
+ NullSucc, Dependency);
+ return true;
+ }
+ }
+
+ HD.rememberInstruction(&MI);
+ if (HD.isClobbered())
+ return false;
+ }
+
+ return false;
+}
+
+/// Wrap a machine load instruction, LoadMI, into a FAULTING_LOAD_OP machine
+/// instruction. The FAULTING_LOAD_OP instruction does the same load as LoadMI
+/// (defining the same register), and branches to HandlerMBB if the load
+/// faults. The FAULTING_LOAD_OP instruction is inserted at the end of MBB.
+MachineInstr *
+ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *HandlerMBB) {
+ const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
+ // all targets.
+
+ DebugLoc DL;
+ unsigned NumDefs = LoadMI->getDesc().getNumDefs();
+ assert(NumDefs <= 1 && "other cases unhandled!");
+
+ unsigned DefReg = NoRegister;
+ if (NumDefs != 0) {
+ DefReg = LoadMI->defs().begin()->getReg();
+ assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
+ "expected exactly one def!");
+ }
+
+ auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
+ .addMBB(HandlerMBB)
+ .addImm(LoadMI->getOpcode());
+
+ for (auto &MO : LoadMI->uses())
+ MIB.addOperand(MO);
+
+ MIB.setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end());
+
+ return MIB;
+}
+
+/// Rewrite the null checks in NullCheckList into implicit null checks.
+void ImplicitNullChecks::rewriteNullChecks(
+ ArrayRef<ImplicitNullChecks::NullCheck> NullCheckList) {
+ DebugLoc DL;
+
+ for (auto &NC : NullCheckList) {
+ // Remove the conditional branch dependent on the null check.
+ unsigned BranchesRemoved = TII->RemoveBranch(*NC.getCheckBlock());
+ (void)BranchesRemoved;
+ assert(BranchesRemoved > 0 && "expected at least one branch!");
+
+ if (auto *DepMI = NC.getOnlyDependency()) {
+ DepMI->removeFromParent();
+ NC.getCheckBlock()->insert(NC.getCheckBlock()->end(), DepMI);
+ }
+
+ // Insert a faulting load where the conditional branch was originally. We
+ // check earlier ensures that this bit of code motion is legal. We do not
+ // touch the successors list for any basic block since we haven't changed
+ // control flow, we've just made it implicit.
+ MachineInstr *FaultingLoad = insertFaultingLoad(
+ NC.getMemOperation(), NC.getCheckBlock(), NC.getNullSucc());
+ // Now the values defined by MemOperation, if any, are live-in of
+ // the block of MemOperation.
+ // The original load operation may define implicit-defs alongside
+ // the loaded value.
+ MachineBasicBlock *MBB = NC.getMemOperation()->getParent();
+ for (const MachineOperand &MO : FaultingLoad->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || MBB->isLiveIn(Reg))
+ continue;
+ MBB->addLiveIn(Reg);
+ }
+
+ if (auto *DepMI = NC.getOnlyDependency()) {
+ for (auto &MO : DepMI->operands()) {
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ continue;
+ if (!NC.getNotNullSucc()->isLiveIn(MO.getReg()))
+ NC.getNotNullSucc()->addLiveIn(MO.getReg());
+ }
+ }
+
+ NC.getMemOperation()->eraseFromParent();
+ NC.getCheckOperation()->eraseFromParent();
+
+ // Insert an *unconditional* branch to not-null successor.
+ TII->InsertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
+ /*Cond=*/None, DL);
+
+ NumImplicitNullChecks++;
+ }
+}
+
+char ImplicitNullChecks::ID = 0;
+char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
+INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks",
+ "Implicit null checks", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks",
+ "Implicit null checks", false, false)
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
new file mode 100644
index 000000000000..197db777dd2c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -0,0 +1,1456 @@
+//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The inline spiller modifies the machine function directly instead of
+// inserting spills and restores in VirtRegMap.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Spiller.h"
+#include "SplitKit.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumSpilledRanges, "Number of spilled live ranges");
+STATISTIC(NumSnippets, "Number of spilled snippets");
+STATISTIC(NumSpills, "Number of spills inserted");
+STATISTIC(NumSpillsRemoved, "Number of spills removed");
+STATISTIC(NumReloads, "Number of reloads inserted");
+STATISTIC(NumReloadsRemoved, "Number of reloads removed");
+STATISTIC(NumFolded, "Number of folded stack accesses");
+STATISTIC(NumFoldedLoads, "Number of folded loads");
+STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
+
+static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
+ cl::desc("Disable inline spill hoisting"));
+
+namespace {
+class HoistSpillHelper : private LiveRangeEdit::Delegate {
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ AliasAnalysis *AA;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineFrameInfo &MFI;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineBlockFrequencyInfo &MBFI;
+
+ InsertPointAnalysis IPA;
+
+ // Map from StackSlot to its original register.
+ DenseMap<int, unsigned> StackSlotToReg;
+ // Map from pair of (StackSlot and Original VNI) to a set of spills which
+ // have the same stackslot and have equal values defined by Original VNI.
+ // These spills are mergeable and are hoist candiates.
+ typedef MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>>
+ MergeableSpillsMap;
+ MergeableSpillsMap MergeableSpills;
+
+ /// This is the map from original register to a set containing all its
+ /// siblings. To hoist a spill to another BB, we need to find out a live
+ /// sibling there and use it as the source of the new spill.
+ DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap;
+
+ bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB,
+ unsigned &LiveReg);
+
+ void rmRedundantSpills(
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
+
+ void getVisitOrders(
+ MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineDomTreeNode *> &Orders,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill);
+
+ void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI,
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns);
+
+public:
+ HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+ MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+ TII(*mf.getSubtarget().getInstrInfo()),
+ TRI(*mf.getSubtarget().getRegisterInfo()),
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ IPA(LIS, mf.getNumBlockIDs()) {}
+
+ void addToMergeableSpills(MachineInstr &Spill, int StackSlot,
+ unsigned Original);
+ bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot);
+ void hoistAllSpills();
+ void LRE_DidCloneVirtReg(unsigned, unsigned) override;
+};
+
+class InlineSpiller : public Spiller {
+ MachineFunction &MF;
+ LiveIntervals &LIS;
+ LiveStacks &LSS;
+ AliasAnalysis *AA;
+ MachineDominatorTree &MDT;
+ MachineLoopInfo &Loops;
+ VirtRegMap &VRM;
+ MachineFrameInfo &MFI;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineBlockFrequencyInfo &MBFI;
+
+ // Variables that are valid during spill(), but used by multiple methods.
+ LiveRangeEdit *Edit;
+ LiveInterval *StackInt;
+ int StackSlot;
+ unsigned Original;
+
+ // All registers to spill to StackSlot, including the main register.
+ SmallVector<unsigned, 8> RegsToSpill;
+
+ // All COPY instructions to/from snippets.
+ // They are ignored since both operands refer to the same stack slot.
+ SmallPtrSet<MachineInstr*, 8> SnippetCopies;
+
+ // Values that failed to remat at some point.
+ SmallPtrSet<VNInfo*, 8> UsedValues;
+
+ // Dead defs generated during spilling.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ // Object records spills information and does the hoisting.
+ HoistSpillHelper HSpiller;
+
+ ~InlineSpiller() override {}
+
+public:
+ InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+ : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
+ LSS(pass.getAnalysis<LiveStacks>()),
+ AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
+ MDT(pass.getAnalysis<MachineDominatorTree>()),
+ Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+ MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+ TII(*mf.getSubtarget().getInstrInfo()),
+ TRI(*mf.getSubtarget().getRegisterInfo()),
+ MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
+ HSpiller(pass, mf, vrm) {}
+
+ void spill(LiveRangeEdit &) override;
+ void postOptimization() override;
+
+private:
+ bool isSnippet(const LiveInterval &SnipLI);
+ void collectRegsToSpill();
+
+ bool isRegToSpill(unsigned Reg) {
+ return std::find(RegsToSpill.begin(),
+ RegsToSpill.end(), Reg) != RegsToSpill.end();
+ }
+
+ bool isSibling(unsigned Reg);
+ bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
+ void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
+
+ void markValueUsed(LiveInterval*, VNInfo*);
+ bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
+ void reMaterializeAll();
+
+ bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
+ bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
+ MachineInstr *LoadMI = nullptr);
+ void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI);
+ void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI);
+
+ void spillAroundUses(unsigned Reg);
+ void spillAll();
+};
+}
+
+namespace llvm {
+
+Spiller::~Spiller() { }
+void Spiller::anchor() { }
+
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ return new InlineSpiller(pass, mf, vrm);
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+// Snippets
+//===----------------------------------------------------------------------===//
+
+// When spilling a virtual register, we also spill any snippets it is connected
+// to. The snippets are small live ranges that only have a single real use,
+// leftovers from live range splitting. Spilling them enables memory operand
+// folding or tightens the live range around the single use.
+//
+// This minimizes register pressure and maximizes the store-to-load distance for
+// spill slots which can be important in tight loops.
+
+/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
+/// otherwise return 0.
+static unsigned isFullCopyOf(const MachineInstr &MI, unsigned Reg) {
+ if (!MI.isFullCopy())
+ return 0;
+ if (MI.getOperand(0).getReg() == Reg)
+ return MI.getOperand(1).getReg();
+ if (MI.getOperand(1).getReg() == Reg)
+ return MI.getOperand(0).getReg();
+ return 0;
+}
+
+/// isSnippet - Identify if a live interval is a snippet that should be spilled.
+/// It is assumed that SnipLI is a virtual register with the same original as
+/// Edit->getReg().
+bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
+ unsigned Reg = Edit->getReg();
+
+ // A snippet is a tiny live range with only a single instruction using it
+ // besides copies to/from Reg or spills/fills. We accept:
+ //
+ // %snip = COPY %Reg / FILL fi#
+ // %snip = USE %snip
+ // %Reg = COPY %snip / SPILL %snip, fi#
+ //
+ if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI))
+ return false;
+
+ MachineInstr *UseMI = nullptr;
+
+ // Check that all uses satisfy our criteria.
+ for (MachineRegisterInfo::reg_instr_nodbg_iterator
+ RI = MRI.reg_instr_nodbg_begin(SnipLI.reg),
+ E = MRI.reg_instr_nodbg_end(); RI != E; ) {
+ MachineInstr &MI = *RI++;
+
+ // Allow copies to/from Reg.
+ if (isFullCopyOf(MI, Reg))
+ continue;
+
+ // Allow stack slot loads.
+ int FI;
+ if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow stack slot stores.
+ if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+ continue;
+
+ // Allow a single additional instruction.
+ if (UseMI && &MI != UseMI)
+ return false;
+ UseMI = &MI;
+ }
+ return true;
+}
+
+/// collectRegsToSpill - Collect live range snippets that only have a single
+/// real use.
+void InlineSpiller::collectRegsToSpill() {
+ unsigned Reg = Edit->getReg();
+
+ // Main register always spills.
+ RegsToSpill.assign(1, Reg);
+ SnippetCopies.clear();
+
+ // Snippets all have the same original, so there can't be any for an original
+ // register.
+ if (Original == Reg)
+ return;
+
+ for (MachineRegisterInfo::reg_instr_iterator
+ RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) {
+ MachineInstr &MI = *RI++;
+ unsigned SnipReg = isFullCopyOf(MI, Reg);
+ if (!isSibling(SnipReg))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(SnipReg);
+ if (!isSnippet(SnipLI))
+ continue;
+ SnippetCopies.insert(&MI);
+ if (isRegToSpill(SnipReg))
+ continue;
+ RegsToSpill.push_back(SnipReg);
+ DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n');
+ ++NumSnippets;
+ }
+}
+
+bool InlineSpiller::isSibling(unsigned Reg) {
+ return TargetRegisterInfo::isVirtualRegister(Reg) &&
+ VRM.getOriginal(Reg) == Original;
+}
+
+/// It is beneficial to spill to earlier place in the same BB in case
+/// as follows:
+/// There is an alternative def earlier in the same MBB.
+/// Hoist the spill as far as possible in SpillMBB. This can ease
+/// register pressure:
+///
+/// x = def
+/// y = use x
+/// s = copy x
+///
+/// Hoisting the spill of s to immediately after the def removes the
+/// interference between x and y:
+///
+/// x = def
+/// spill x
+/// y = use x<kill>
+///
+/// This hoist only helps when the copy kills its source.
+///
+bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
+ MachineInstr &CopyMI) {
+ SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
+#ifndef NDEBUG
+ VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
+ assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
+#endif
+
+ unsigned SrcReg = CopyMI.getOperand(1).getReg();
+ LiveInterval &SrcLI = LIS.getInterval(SrcReg);
+ VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx);
+ LiveQueryResult SrcQ = SrcLI.Query(Idx);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def);
+ if (DefMBB != CopyMI.getParent() || !SrcQ.isKill())
+ return false;
+
+ // Conservatively extend the stack slot range to the range of the original
+ // value. We may be able to do better with stack slot coloring by being more
+ // careful here.
+ assert(StackInt && "No stack slot assigned yet.");
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": "
+ << *StackInt << '\n');
+
+ // We are going to spill SrcVNI immediately after its def, so clear out
+ // any later spills of the same value.
+ eliminateRedundantSpills(SrcLI, SrcVNI);
+
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
+ MachineBasicBlock::iterator MII;
+ if (SrcVNI->isPHIDef())
+ MII = MBB->SkipPHIsAndLabels(MBB->begin());
+ else {
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
+ assert(DefMI && "Defining instruction disappeared");
+ MII = DefMI;
+ ++MII;
+ }
+ // Insert spill without kill flag immediately after def.
+ TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
+ MRI.getRegClass(SrcReg), &TRI);
+ --MII; // Point to store instruction.
+ LIS.InsertMachineInstrInMaps(*MII);
+ DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
+
+ HSpiller.addToMergeableSpills(*MII, StackSlot, Original);
+ ++NumSpills;
+ return true;
+}
+
+/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any
+/// redundant spills of this value in SLI.reg and sibling copies.
+void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
+ assert(VNI && "Missing value");
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(&SLI, VNI));
+ assert(StackInt && "No stack slot assigned yet.");
+
+ do {
+ LiveInterval *LI;
+ std::tie(LI, VNI) = WorkList.pop_back_val();
+ unsigned Reg = LI->reg;
+ DEBUG(dbgs() << "Checking redundant spills for "
+ << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
+
+ // Regs to spill are taken care of.
+ if (isRegToSpill(Reg))
+ continue;
+
+ // Add all of VNI's live range to StackInt.
+ StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
+
+ // Find all spills and copies of VNI.
+ for (MachineRegisterInfo::use_instr_nodbg_iterator
+ UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
+ UI != E; ) {
+ MachineInstr &MI = *UI++;
+ if (!MI.isCopy() && !MI.mayStore())
+ continue;
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ if (LI->getVNInfoAt(Idx) != VNI)
+ continue;
+
+ // Follow sibling copies down the dominator tree.
+ if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
+ if (isSibling(DstReg)) {
+ LiveInterval &DstLI = LIS.getInterval(DstReg);
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && "Missing defined value");
+ assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
+ WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+ }
+ continue;
+ }
+
+ // Erase spills.
+ int FI;
+ if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) {
+ DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI);
+ // eliminateDeadDefs won't normally remove stores, so switch opcode.
+ MI.setDesc(TII.get(TargetOpcode::KILL));
+ DeadDefs.push_back(&MI);
+ ++NumSpillsRemoved;
+ if (HSpiller.rmFromMergeableSpills(MI, StackSlot))
+ --NumSpills;
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Rematerialization
+//===----------------------------------------------------------------------===//
+
+/// markValueUsed - Remember that VNI failed to rematerialize, so its defining
+/// instruction cannot be eliminated. See through snippet copies
+void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
+ SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
+ WorkList.push_back(std::make_pair(LI, VNI));
+ do {
+ std::tie(LI, VNI) = WorkList.pop_back_val();
+ if (!UsedValues.insert(VNI).second)
+ continue;
+
+ if (VNI->isPHIDef()) {
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ for (MachineBasicBlock *P : MBB->predecessors()) {
+ VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(P));
+ if (PVNI)
+ WorkList.push_back(std::make_pair(LI, PVNI));
+ }
+ continue;
+ }
+
+ // Follow snippet copies.
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ if (!SnippetCopies.count(MI))
+ continue;
+ LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
+ assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
+ VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
+ assert(SnipVNI && "Snippet undefined before copy");
+ WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
+ } while (!WorkList.empty());
+}
+
+/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
+bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
+
+ // Analyze instruction
+ SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
+ MIBundleOperands::VirtRegInfo RI =
+ MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+
+ if (!RI.Reads)
+ return false;
+
+ SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
+ VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
+
+ if (!ParentVNI) {
+ DEBUG(dbgs() << "\tadding <undef> flags: ");
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
+ MO.setIsUndef();
+ }
+ DEBUG(dbgs() << UseIdx << '\t' << MI);
+ return true;
+ }
+
+ if (SnippetCopies.count(&MI))
+ return false;
+
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
+ LiveRangeEdit::Remat RM(ParentVNI);
+ RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+
+ if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
+ return false;
+ }
+
+ // If the instruction also writes VirtReg.reg, it had better not require the
+ // same register for uses and defs.
+ if (RI.Tied) {
+ markValueUsed(&VirtReg, ParentVNI);
+ DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI);
+ return false;
+ }
+
+ // Before rematerializing into a register for a single instruction, try to
+ // fold a load into the instruction. That avoids allocating a new register.
+ if (RM.OrigMI->canFoldAsLoad() &&
+ foldMemoryOperand(Ops, RM.OrigMI)) {
+ Edit->markRematerialized(RM.ParentVNI);
+ ++NumFoldedLoads;
+ return true;
+ }
+
+ // Alocate a new register for the remat.
+ unsigned NewVReg = Edit->createFrom(Original);
+
+ // Finally we can rematerialize OrigMI before MI.
+ SlotIndex DefIdx =
+ Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
+ (void)DefIdx;
+ DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
+ << *LIS.getInstructionFromIndex(DefIdx));
+
+ // Replace operands
+ for (const auto &OpPair : Ops) {
+ MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
+ MO.setReg(NewVReg);
+ MO.setIsKill();
+ }
+ }
+ DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n');
+
+ ++NumRemats;
+ return true;
+}
+
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
+/// and trim the live ranges after.
+void InlineSpiller::reMaterializeAll() {
+ if (!Edit->anyRematerializable(AA))
+ return;
+
+ UsedValues.clear();
+
+ // Try to remat before all uses of snippets.
+ bool anyRemat = false;
+ for (unsigned Reg : RegsToSpill) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (MachineRegisterInfo::reg_bundle_iterator
+ RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
+ RegI != E; ) {
+ MachineInstr &MI = *RegI++;
+
+ // Debug values are not allowed to affect codegen.
+ if (MI.isDebugValue())
+ continue;
+
+ anyRemat |= reMaterializeFor(LI, MI);
+ }
+ }
+ if (!anyRemat)
+ return;
+
+ // Remove any values that were completely rematted.
+ for (unsigned Reg : RegsToSpill) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
+ MI->addRegisterDead(Reg, &TRI);
+ if (!MI->allDefsAreDead())
+ continue;
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ DeadDefs.push_back(MI);
+ }
+ }
+
+ // Eliminate dead code after remat. Note that some snippet copies may be
+ // deleted here.
+ if (DeadDefs.empty())
+ return;
+ DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);
+
+ // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions
+ // after rematerialization. To remove a VNI for a vreg from its LiveInterval,
+ // LiveIntervals::removeVRegDefAt is used. However, after non-PHI VNIs are all
+ // removed, PHI VNI are still left in the LiveInterval.
+ // So to get rid of unused reg, we need to check whether it has non-dbg
+ // reference instead of whether it has non-empty interval.
+ unsigned ResultPos = 0;
+ for (unsigned Reg : RegsToSpill) {
+ if (MRI.reg_nodbg_empty(Reg)) {
+ Edit->eraseVirtReg(Reg);
+ continue;
+ }
+ assert((LIS.hasInterval(Reg) && !LIS.getInterval(Reg).empty()) &&
+ "Reg with empty interval has reference");
+ RegsToSpill[ResultPos++] = Reg;
+ }
+ RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
+ DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
+}
+
+
+//===----------------------------------------------------------------------===//
+// Spilling
+//===----------------------------------------------------------------------===//
+
+/// If MI is a load or store of StackSlot, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
+ int FI = 0;
+ unsigned InstrReg = TII.isLoadFromStackSlot(*MI, FI);
+ bool IsLoad = InstrReg;
+ if (!IsLoad)
+ InstrReg = TII.isStoreToStackSlot(*MI, FI);
+
+ // We have a stack access. Is it the right register and slot?
+ if (InstrReg != Reg || FI != StackSlot)
+ return false;
+
+ if (!IsLoad)
+ HSpiller.rmFromMergeableSpills(*MI, StackSlot);
+
+ DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+
+ if (IsLoad) {
+ ++NumReloadsRemoved;
+ --NumReloads;
+ } else {
+ ++NumSpillsRemoved;
+ --NumSpills;
+ }
+
+ return true;
+}
+
+#if !defined(NDEBUG)
+// Dump the range of instructions from B to E with their slot indexes.
+static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
+ MachineBasicBlock::iterator E,
+ LiveIntervals const &LIS,
+ const char *const header,
+ unsigned VReg =0) {
+ char NextLine = '\n';
+ char SlotIndent = '\t';
+
+ if (std::next(B) == E) {
+ NextLine = ' ';
+ SlotIndent = ' ';
+ }
+
+ dbgs() << '\t' << header << ": " << NextLine;
+
+ for (MachineBasicBlock::iterator I = B; I != E; ++I) {
+ SlotIndex Idx = LIS.getInstructionIndex(*I).getRegSlot();
+
+ // If a register was passed in and this instruction has it as a
+ // destination that is marked as an early clobber, print the
+ // early-clobber slot index.
+ if (VReg) {
+ MachineOperand *MO = I->findRegisterDefOperand(VReg);
+ if (MO && MO->isEarlyClobber())
+ Idx = Idx.getRegSlot(true);
+ }
+
+ dbgs() << SlotIndent << Idx << '\t' << *I;
+ }
+}
+#endif
+
+/// foldMemoryOperand - Try folding stack slot references in Ops into their
+/// instructions.
+///
+/// @param Ops Operand indices from analyzeVirtReg().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return True on success.
+bool InlineSpiller::
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
+ MachineInstr *LoadMI) {
+ if (Ops.empty())
+ return false;
+ // Don't attempt folding in bundles.
+ MachineInstr *MI = Ops.front().first;
+ if (Ops.back().first != MI || MI->isBundled())
+ return false;
+
+ bool WasCopy = MI->isCopy();
+ unsigned ImpReg = 0;
+
+ bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI->getOpcode() == TargetOpcode::STACKMAP);
+
+ // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
+ // operands.
+ SmallVector<unsigned, 8> FoldOps;
+ for (const auto &OpPair : Ops) {
+ unsigned Idx = OpPair.second;
+ assert(MI == OpPair.first && "Instruction conflict during operand folding");
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (MO.isImplicit()) {
+ ImpReg = MO.getReg();
+ continue;
+ }
+ // FIXME: Teach targets to deal with subregs.
+ if (!SpillSubRegs && MO.getSubReg())
+ return false;
+ // We cannot fold a load instruction into a def.
+ if (LoadMI && MO.isDef())
+ return false;
+ // Tied use operands should not be passed to foldMemoryOperand.
+ if (!MI->isRegTiedToDefOperand(Idx))
+ FoldOps.push_back(Idx);
+ }
+
+ MachineInstrSpan MIS(MI);
+
+ MachineInstr *FoldMI =
+ LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
+ : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS);
+ if (!FoldMI)
+ return false;
+
+ // Remove LIS for any dead defs in the original MI not in FoldMI.
+ for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) {
+ if (!MO->isReg())
+ continue;
+ unsigned Reg = MO->getReg();
+ if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
+ MRI.isReserved(Reg)) {
+ continue;
+ }
+ // Skip non-Defs, including undef uses and internal reads.
+ if (MO->isUse())
+ continue;
+ MIBundleOperands::PhysRegInfo RI =
+ MIBundleOperands(*FoldMI).analyzePhysReg(Reg, &TRI);
+ if (RI.FullyDefined)
+ continue;
+ // FoldMI does not define this physreg. Remove the LI segment.
+ assert(MO->isDead() && "Cannot fold physreg def");
+ SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
+ LIS.removePhysRegDefAt(Reg, Idx);
+ }
+
+ int FI;
+ if (TII.isStoreToStackSlot(*MI, FI) &&
+ HSpiller.rmFromMergeableSpills(*MI, FI))
+ --NumSpills;
+ LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
+ MI->eraseFromParent();
+
+ // Insert any new instructions other than FoldMI into the LIS maps.
+ assert(!MIS.empty() && "Unexpected empty span of instructions!");
+ for (MachineInstr &MI : MIS)
+ if (&MI != FoldMI)
+ LIS.InsertMachineInstrInMaps(MI);
+
+ // TII.foldMemoryOperand may have left some implicit operands on the
+ // instruction. Strip them.
+ if (ImpReg)
+ for (unsigned i = FoldMI->getNumOperands(); i; --i) {
+ MachineOperand &MO = FoldMI->getOperand(i - 1);
+ if (!MO.isReg() || !MO.isImplicit())
+ break;
+ if (MO.getReg() == ImpReg)
+ FoldMI->RemoveOperand(i - 1);
+ }
+
+ DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS,
+ "folded"));
+
+ if (!WasCopy)
+ ++NumFolded;
+ else if (Ops.front().second == 0) {
+ ++NumSpills;
+ HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original);
+ } else
+ ++NumReloads;
+ return true;
+}
+
+void InlineSpiller::insertReload(unsigned NewVReg,
+ SlotIndex Idx,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ MachineInstrSpan MIS(MI);
+ TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot,
+ MRI.getRegClass(NewVReg), &TRI);
+
+ LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI);
+
+ DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload",
+ NewVReg));
+ ++NumReloads;
+}
+
+/// insertSpill - Insert a spill of NewVReg after MI.
+void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+
+ MachineInstrSpan MIS(MI);
+ TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot,
+ MRI.getRegClass(NewVReg), &TRI);
+
+ LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end());
+
+ DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
+ "spill"));
+ ++NumSpills;
+ HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original);
+}
+
+/// spillAroundUses - insert spill code around each use of Reg.
+void InlineSpiller::spillAroundUses(unsigned Reg) {
+ DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n');
+ LiveInterval &OldLI = LIS.getInterval(Reg);
+
+ // Iterate over instructions using Reg.
+ for (MachineRegisterInfo::reg_bundle_iterator
+ RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
+ RegI != E; ) {
+ MachineInstr *MI = &*(RegI++);
+
+ // Debug values are not allowed to affect codegen.
+ if (MI->isDebugValue()) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ bool IsIndirect = MI->isIndirectDebugValue();
+ uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+ const MDNode *Var = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
+ DebugLoc DL = MI->getDebugLoc();
+ DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
+ .addFrameIndex(StackSlot)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ continue;
+ }
+
+ // Ignore copies to/from snippets. We'll delete them.
+ if (SnippetCopies.count(MI))
+ continue;
+
+ // Stack slot accesses may coalesce away.
+ if (coalesceStackAccess(MI, Reg))
+ continue;
+
+ // Analyze instruction.
+ SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+ MIBundleOperands::VirtRegInfo RI =
+ MIBundleOperands(*MI).analyzeVirtReg(Reg, &Ops);
+
+ // Find the slot index where this instruction reads and writes OldLI.
+ // This is usually the def slot, except for tied early clobbers.
+ SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
+ if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
+ if (SlotIndex::isSameInstr(Idx, VNI->def))
+ Idx = VNI->def;
+
+ // Check for a sibling copy.
+ unsigned SibReg = isFullCopyOf(*MI, Reg);
+ if (SibReg && isSibling(SibReg)) {
+ // This may actually be a copy between snippets.
+ if (isRegToSpill(SibReg)) {
+ DEBUG(dbgs() << "Found new snippet copy: " << *MI);
+ SnippetCopies.insert(MI);
+ continue;
+ }
+ if (RI.Writes) {
+ if (hoistSpillInsideBB(OldLI, *MI)) {
+ // This COPY is now dead, the value is already in the stack slot.
+ MI->getOperand(0).setIsDead();
+ DeadDefs.push_back(MI);
+ continue;
+ }
+ } else {
+ // This is a reload for a sib-reg copy. Drop spills downstream.
+ LiveInterval &SibLI = LIS.getInterval(SibReg);
+ eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx));
+ // The COPY will fold to a reload below.
+ }
+ }
+
+ // Attempt to fold memory ops.
+ if (foldMemoryOperand(Ops))
+ continue;
+
+ // Create a new virtual register for spill/fill.
+ // FIXME: Infer regclass from instruction alone.
+ unsigned NewVReg = Edit->createFrom(Reg);
+
+ if (RI.Reads)
+ insertReload(NewVReg, Idx, MI);
+
+ // Rewrite instruction operands.
+ bool hasLiveDef = false;
+ for (const auto &OpPair : Ops) {
+ MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
+ MO.setReg(NewVReg);
+ if (MO.isUse()) {
+ if (!OpPair.first->isRegTiedToDefOperand(OpPair.second))
+ MO.setIsKill();
+ } else {
+ if (!MO.isDead())
+ hasLiveDef = true;
+ }
+ }
+ DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n');
+
+ // FIXME: Use a second vreg if instruction has no tied ops.
+ if (RI.Writes)
+ if (hasLiveDef)
+ insertSpill(NewVReg, true, MI);
+ }
+}
+
+/// spillAll - Spill all registers remaining after rematerialization.
+void InlineSpiller::spillAll() {
+ // Update LiveStacks now that we are committed to spilling.
+ if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
+ StackSlot = VRM.assignVirt2StackSlot(Original);
+ StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
+ StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator());
+ } else
+ StackInt = &LSS.getInterval(StackSlot);
+
+ if (Original != Edit->getReg())
+ VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
+
+ assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
+ for (unsigned Reg : RegsToSpill)
+ StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg),
+ StackInt->getValNumInfo(0));
+ DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
+
+ // Spill around uses of all RegsToSpill.
+ for (unsigned Reg : RegsToSpill)
+ spillAroundUses(Reg);
+
+ // Hoisted spills may cause dead code.
+ if (!DeadDefs.empty()) {
+ DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
+ Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA);
+ }
+
+ // Finally delete the SnippetCopies.
+ for (unsigned Reg : RegsToSpill) {
+ for (MachineRegisterInfo::reg_instr_iterator
+ RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end();
+ RI != E; ) {
+ MachineInstr &MI = *(RI++);
+ assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");
+ // FIXME: Do this with a LiveRangeEdit callback.
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ }
+ }
+
+ // Delete all spilled registers.
+ for (unsigned Reg : RegsToSpill)
+ Edit->eraseVirtReg(Reg);
+}
+
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+ ++NumSpilledRanges;
+ Edit = &edit;
+ assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+ && "Trying to spill a stack slot.");
+ // Share a stack slot among all descendants of Original.
+ Original = VRM.getOriginal(edit.getReg());
+ StackSlot = VRM.getStackSlot(Original);
+ StackInt = nullptr;
+
+ DEBUG(dbgs() << "Inline spilling "
+ << TRI.getRegClassName(MRI.getRegClass(edit.getReg()))
+ << ':' << edit.getParent()
+ << "\nFrom original " << PrintReg(Original) << '\n');
+ assert(edit.getParent().isSpillable() &&
+ "Attempting to spill already spilled value.");
+ assert(DeadDefs.empty() && "Previous spill didn't remove dead defs");
+
+ collectRegsToSpill();
+ reMaterializeAll();
+
+ // Remat may handle everything.
+ if (!RegsToSpill.empty())
+ spillAll();
+
+ Edit->calculateRegClassAndHint(MF, Loops, MBFI);
+}
+
+/// Optimizations after all the reg selections and spills are done.
+///
+void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); }
+
+/// When a spill is inserted, add the spill to MergeableSpills map.
+///
+void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
+ unsigned Original) {
+ StackSlotToReg[StackSlot] = Original;
+ SlotIndex Idx = LIS.getInstructionIndex(Spill);
+ VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+ std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
+ MergeableSpills[MIdx].insert(&Spill);
+}
+
+/// When a spill is removed, remove the spill from MergeableSpills map.
+/// Return true if the spill is removed successfully.
+///
+bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill,
+ int StackSlot) {
+ int Original = StackSlotToReg[StackSlot];
+ if (!Original)
+ return false;
+ SlotIndex Idx = LIS.getInstructionIndex(Spill);
+ VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot());
+ std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI);
+ return MergeableSpills[MIdx].erase(&Spill);
+}
+
+/// Check BB to see if it is a possible target BB to place a hoisted spill,
+/// i.e., there should be a living sibling of OrigReg at the insert point.
+///
+bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI,
+ MachineBasicBlock &BB, unsigned &LiveReg) {
+ SlotIndex Idx;
+ LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+ MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB);
+ if (MI != BB.end())
+ Idx = LIS.getInstructionIndex(*MI);
+ else
+ Idx = LIS.getMBBEndIdx(&BB).getPrevSlot();
+ SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg];
+ assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI &&
+ "Unexpected VNI");
+
+ for (auto const SibReg : Siblings) {
+ LiveInterval &LI = LIS.getInterval(SibReg);
+ VNInfo *VNI = LI.getVNInfoAt(Idx);
+ if (VNI) {
+ LiveReg = SibReg;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Remove redundant spills in the same BB. Save those redundant spills in
+/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map.
+///
+void HoistSpillHelper::rmRedundantSpills(
+ SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
+ // For each spill saw, check SpillBBToSpill[] and see if its BB already has
+ // another spill inside. If a BB contains more than one spill, only keep the
+ // earlier spill with smaller SlotIndex.
+ for (const auto CurrentSpill : Spills) {
+ MachineBasicBlock *Block = CurrentSpill->getParent();
+ MachineDomTreeNode *Node = MDT.DT->getNode(Block);
+ MachineInstr *PrevSpill = SpillBBToSpill[Node];
+ if (PrevSpill) {
+ SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill);
+ SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill);
+ MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill;
+ MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill;
+ SpillsToRm.push_back(SpillToRm);
+ SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep;
+ } else {
+ SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill;
+ }
+ }
+ for (const auto SpillToRm : SpillsToRm)
+ Spills.erase(SpillToRm);
+}
+
+/// Starting from \p Root find a top-down traversal order of the dominator
+/// tree to visit all basic blocks containing the elements of \p Spills.
+/// Redundant spills will be found and put into \p SpillsToRm at the same
+/// time. \p SpillBBToSpill will be populated as part of the process and
+/// maps a basic block to the first store occurring in the basic block.
+/// \post SpillsToRm.union(Spills\@post) == Spills\@pre
+///
+void HoistSpillHelper::getVisitOrders(
+ MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineDomTreeNode *> &Orders,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep,
+ DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) {
+ // The set contains all the possible BB nodes to which we may hoist
+ // original spills.
+ SmallPtrSet<MachineDomTreeNode *, 8> WorkSet;
+ // Save the BB nodes on the path from the first BB node containing
+ // non-redundant spill to the Root node.
+ SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath;
+ // All the spills to be hoisted must originate from a single def instruction
+ // to the OrigReg. It means the def instruction should dominate all the spills
+ // to be hoisted. We choose the BB where the def instruction is located as
+ // the Root.
+ MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom();
+ // For every node on the dominator tree with spill, walk up on the dominator
+ // tree towards the Root node until it is reached. If there is other node
+ // containing spill in the middle of the path, the previous spill saw will
+ // be redundant and the node containing it will be removed. All the nodes on
+ // the path starting from the first node with non-redundant spill to the Root
+ // node will be added to the WorkSet, which will contain all the possible
+ // locations where spills may be hoisted to after the loop below is done.
+ for (const auto Spill : Spills) {
+ MachineBasicBlock *Block = Spill->getParent();
+ MachineDomTreeNode *Node = MDT[Block];
+ MachineInstr *SpillToRm = nullptr;
+ while (Node != RootIDomNode) {
+ // If Node dominates Block, and it already contains a spill, the spill in
+ // Block will be redundant.
+ if (Node != MDT[Block] && SpillBBToSpill[Node]) {
+ SpillToRm = SpillBBToSpill[MDT[Block]];
+ break;
+ /// If we see the Node already in WorkSet, the path from the Node to
+ /// the Root node must already be traversed by another spill.
+ /// Then no need to repeat.
+ } else if (WorkSet.count(Node)) {
+ break;
+ } else {
+ NodesOnPath.insert(Node);
+ }
+ Node = Node->getIDom();
+ }
+ if (SpillToRm) {
+ SpillsToRm.push_back(SpillToRm);
+ } else {
+ // Add a BB containing the original spills to SpillsToKeep -- i.e.,
+ // set the initial status before hoisting start. The value of BBs
+ // containing original spills is set to 0, in order to descriminate
+ // with BBs containing hoisted spills which will be inserted to
+ // SpillsToKeep later during hoisting.
+ SpillsToKeep[MDT[Block]] = 0;
+ WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end());
+ }
+ NodesOnPath.clear();
+ }
+
+ // Sort the nodes in WorkSet in top-down order and save the nodes
+ // in Orders. Orders will be used for hoisting in runHoistSpills.
+ unsigned idx = 0;
+ Orders.push_back(MDT.DT->getNode(Root));
+ do {
+ MachineDomTreeNode *Node = Orders[idx++];
+ const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+ for (unsigned i = 0; i != NumChildren; ++i) {
+ MachineDomTreeNode *Child = Children[i];
+ if (WorkSet.count(Child))
+ Orders.push_back(Child);
+ }
+ } while (idx != Orders.size());
+ assert(Orders.size() == WorkSet.size() &&
+ "Orders have different size with WorkSet");
+
+#ifndef NDEBUG
+ DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n");
+ SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
+ for (; RIt != Orders.rend(); RIt++)
+ DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ",");
+ DEBUG(dbgs() << "\n");
+#endif
+}
+
+/// Try to hoist spills according to BB hotness. The spills to removed will
+/// be saved in \p SpillsToRm. The spills to be inserted will be saved in
+/// \p SpillsToIns.
+///
+void HoistSpillHelper::runHoistSpills(
+ unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills,
+ SmallVectorImpl<MachineInstr *> &SpillsToRm,
+ DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) {
+ // Visit order of dominator tree nodes.
+ SmallVector<MachineDomTreeNode *, 32> Orders;
+ // SpillsToKeep contains all the nodes where spills are to be inserted
+ // during hoisting. If the spill to be inserted is an original spill
+ // (not a hoisted one), the value of the map entry is 0. If the spill
+ // is a hoisted spill, the value of the map entry is the VReg to be used
+ // as the source of the spill.
+ DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep;
+ // Map from BB to the first spill inside of it.
+ DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill;
+
+ rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill);
+
+ MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def);
+ getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep,
+ SpillBBToSpill);
+
+ // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of
+ // nodes set and the cost of all the spills inside those nodes.
+ // The nodes set are the locations where spills are to be inserted
+ // in the subtree of current node.
+ typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency>
+ NodesCostPair;
+ DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap;
+ // Iterate Orders set in reverse order, which will be a bottom-up order
+ // in the dominator tree. Once we visit a dom tree node, we know its
+ // children have already been visited and the spill locations in the
+ // subtrees of all the children have been determined.
+ SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin();
+ for (; RIt != Orders.rend(); RIt++) {
+ MachineBasicBlock *Block = (*RIt)->getBlock();
+
+ // If Block contains an original spill, simply continue.
+ if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]) {
+ SpillsInSubTreeMap[*RIt].first.insert(*RIt);
+ // SpillsInSubTreeMap[*RIt].second contains the cost of spill.
+ SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
+ continue;
+ }
+
+ // Collect spills in subtree of current node (*RIt) to
+ // SpillsInSubTreeMap[*RIt].first.
+ const std::vector<MachineDomTreeNode *> &Children = (*RIt)->getChildren();
+ unsigned NumChildren = Children.size();
+ for (unsigned i = 0; i != NumChildren; ++i) {
+ MachineDomTreeNode *Child = Children[i];
+ if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end())
+ continue;
+ // The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below
+ // should be placed before getting the begin and end iterators of
+ // SpillsInSubTreeMap[Child].first, or else the iterators may be
+ // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time
+ // and the map grows and then the original buckets in the map are moved.
+ SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree =
+ SpillsInSubTreeMap[*RIt].first;
+ BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second;
+ SubTreeCost += SpillsInSubTreeMap[Child].second;
+ auto BI = SpillsInSubTreeMap[Child].first.begin();
+ auto EI = SpillsInSubTreeMap[Child].first.end();
+ SpillsInSubTree.insert(BI, EI);
+ SpillsInSubTreeMap.erase(Child);
+ }
+
+ SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree =
+ SpillsInSubTreeMap[*RIt].first;
+ BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second;
+ // No spills in subtree, simply continue.
+ if (SpillsInSubTree.empty())
+ continue;
+
+ // Check whether Block is a possible candidate to insert spill.
+ unsigned LiveReg = 0;
+ if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg))
+ continue;
+
+ // If there are multiple spills that could be merged, bias a little
+ // to hoist the spill.
+ BranchProbability MarginProb = (SpillsInSubTree.size() > 1)
+ ? BranchProbability(9, 10)
+ : BranchProbability(1, 1);
+ if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) {
+ // Hoist: Move spills to current Block.
+ for (const auto SpillBB : SpillsInSubTree) {
+ // When SpillBB is a BB contains original spill, insert the spill
+ // to SpillsToRm.
+ if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() &&
+ !SpillsToKeep[SpillBB]) {
+ MachineInstr *SpillToRm = SpillBBToSpill[SpillBB];
+ SpillsToRm.push_back(SpillToRm);
+ }
+ // SpillBB will not contain spill anymore, remove it from SpillsToKeep.
+ SpillsToKeep.erase(SpillBB);
+ }
+ // Current Block is the BB containing the new hoisted spill. Add it to
+ // SpillsToKeep. LiveReg is the source of the new spill.
+ SpillsToKeep[*RIt] = LiveReg;
+ DEBUG({
+ dbgs() << "spills in BB: ";
+ for (const auto Rspill : SpillsInSubTree)
+ dbgs() << Rspill->getBlock()->getNumber() << " ";
+ dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber()
+ << "\n";
+ });
+ SpillsInSubTree.clear();
+ SpillsInSubTree.insert(*RIt);
+ SubTreeCost = MBFI.getBlockFreq(Block);
+ }
+ }
+ // For spills in SpillsToKeep with LiveReg set (i.e., not original spill),
+ // save them to SpillsToIns.
+ for (const auto Ent : SpillsToKeep) {
+ if (Ent.second)
+ SpillsToIns[Ent.first->getBlock()] = Ent.second;
+ }
+}
+
+/// For spills with equal values, remove redundant spills and hoist those left
+/// to less hot spots.
+///
+/// Spills with equal values will be collected into the same set in
+/// MergeableSpills when spill is inserted. These equal spills are originated
+/// from the same defining instruction and are dominated by the instruction.
+/// Before hoisting all the equal spills, redundant spills inside in the same
+/// BB are first marked to be deleted. Then starting from the spills left, walk
+/// up on the dominator tree towards the Root node where the define instruction
+/// is located, mark the dominated spills to be deleted along the way and
+/// collect the BB nodes on the path from non-dominated spills to the define
+/// instruction into a WorkSet. The nodes in WorkSet are the candidate places
+/// where we are considering to hoist the spills. We iterate the WorkSet in
+/// bottom-up order, and for each node, we will decide whether to hoist spills
+/// inside its subtree to that node. In this way, we can get benefit locally
+/// even if hoisting all the equal spills to one cold place is impossible.
+///
+void HoistSpillHelper::hoistAllSpills() {
+ SmallVector<unsigned, 4> NewVRegs;
+ LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this);
+
+ // Save the mapping between stackslot and its original reg.
+ DenseMap<int, unsigned> SlotToOrigReg;
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ int Slot = VRM.getStackSlot(Reg);
+ if (Slot != VirtRegMap::NO_STACK_SLOT)
+ SlotToOrigReg[Slot] = VRM.getOriginal(Reg);
+ unsigned Original = VRM.getPreSplitReg(Reg);
+ if (!MRI.def_empty(Reg))
+ Virt2SiblingsMap[Original].insert(Reg);
+ }
+
+ // Each entry in MergeableSpills contains a spill set with equal values.
+ for (auto &Ent : MergeableSpills) {
+ int Slot = Ent.first.first;
+ unsigned OrigReg = SlotToOrigReg[Slot];
+ LiveInterval &OrigLI = LIS.getInterval(OrigReg);
+ VNInfo *OrigVNI = Ent.first.second;
+ SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second;
+ if (Ent.second.empty())
+ continue;
+
+ DEBUG({
+ dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n"
+ << "Equal spills in BB: ";
+ for (const auto spill : EqValSpills)
+ dbgs() << spill->getParent()->getNumber() << " ";
+ dbgs() << "\n";
+ });
+
+ // SpillsToRm is the spill set to be removed from EqValSpills.
+ SmallVector<MachineInstr *, 16> SpillsToRm;
+ // SpillsToIns is the spill set to be newly inserted after hoisting.
+ DenseMap<MachineBasicBlock *, unsigned> SpillsToIns;
+
+ runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns);
+
+ DEBUG({
+ dbgs() << "Finally inserted spills in BB: ";
+ for (const auto Ispill : SpillsToIns)
+ dbgs() << Ispill.first->getNumber() << " ";
+ dbgs() << "\nFinally removed spills in BB: ";
+ for (const auto Rspill : SpillsToRm)
+ dbgs() << Rspill->getParent()->getNumber() << " ";
+ dbgs() << "\n";
+ });
+
+ // Stack live range update.
+ LiveInterval &StackIntvl = LSS.getInterval(Slot);
+ if (!SpillsToIns.empty() || !SpillsToRm.empty())
+ StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI,
+ StackIntvl.getValNumInfo(0));
+
+ // Insert hoisted spills.
+ for (auto const Insert : SpillsToIns) {
+ MachineBasicBlock *BB = Insert.first;
+ unsigned LiveReg = Insert.second;
+ MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB);
+ TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot,
+ MRI.getRegClass(LiveReg), &TRI);
+ LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI);
+ ++NumSpills;
+ }
+
+ // Remove redundant spills or change them to dead instructions.
+ NumSpills -= SpillsToRm.size();
+ for (auto const RMEnt : SpillsToRm) {
+ RMEnt->setDesc(TII.get(TargetOpcode::KILL));
+ for (unsigned i = RMEnt->getNumOperands(); i; --i) {
+ MachineOperand &MO = RMEnt->getOperand(i - 1);
+ if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead())
+ RMEnt->RemoveOperand(i - 1);
+ }
+ }
+ Edit.eliminateDeadDefs(SpillsToRm, None, AA);
+ }
+}
+
+/// For VirtReg clone, the \p New register should have the same physreg or
+/// stackslot as the \p old register.
+void HoistSpillHelper::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+ if (VRM.hasPhys(Old))
+ VRM.assignVirt2Phys(New, VRM.getPhys(Old));
+ else if (VRM.getStackSlot(Old) != VirtRegMap::NO_STACK_SLOT)
+ VRM.assignVirt2StackSlot(New, VRM.getStackSlot(Old));
+ else
+ llvm_unreachable("VReg should be assigned either physreg or stackslot");
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
new file mode 100644
index 000000000000..f8cc24724580
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -0,0 +1,250 @@
+//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference in LiveIntervalUnions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InterferenceCache.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+// Static member used for null interference cursors.
+const InterferenceCache::BlockInterference
+ InterferenceCache::Cursor::NoInterference;
+
+// Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a
+// buffer of size NumPhysRegs to speed up alloc/clear for targets with large
+// reg files). Calloced memory is used for good form, and quites tools like
+// Valgrind too, but zero initialized memory is not required by the algorithm:
+// this is because PhysRegEntries works like a SparseSet and its entries are
+// only valid when there is a corresponding CacheEntries assignment. There is
+// also support for when pass managers are reused for targets with different
+// numbers of PhysRegs: in this case PhysRegEntries is freed and reinitialized.
+void InterferenceCache::reinitPhysRegEntries() {
+ if (PhysRegEntriesCount == TRI->getNumRegs()) return;
+ free(PhysRegEntries);
+ PhysRegEntriesCount = TRI->getNumRegs();
+ PhysRegEntries = (unsigned char*)
+ calloc(PhysRegEntriesCount, sizeof(unsigned char));
+}
+
+void InterferenceCache::init(MachineFunction *mf,
+ LiveIntervalUnion *liuarray,
+ SlotIndexes *indexes,
+ LiveIntervals *lis,
+ const TargetRegisterInfo *tri) {
+ MF = mf;
+ LIUArray = liuarray;
+ TRI = tri;
+ reinitPhysRegEntries();
+ for (unsigned i = 0; i != CacheEntries; ++i)
+ Entries[i].clear(mf, indexes, lis);
+}
+
+InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
+ unsigned E = PhysRegEntries[PhysReg];
+ if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
+ if (!Entries[E].valid(LIUArray, TRI))
+ Entries[E].revalidate(LIUArray, TRI);
+ return &Entries[E];
+ }
+ // No valid entry exists, pick the next round-robin entry.
+ E = RoundRobin;
+ if (++RoundRobin == CacheEntries)
+ RoundRobin = 0;
+ for (unsigned i = 0; i != CacheEntries; ++i) {
+ // Skip entries that are in use.
+ if (Entries[E].hasRefs()) {
+ if (++E == CacheEntries)
+ E = 0;
+ continue;
+ }
+ Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+ PhysRegEntries[PhysReg] = E;
+ return &Entries[E];
+ }
+ llvm_unreachable("Ran out of interference cache entries.");
+}
+
+/// revalidate - LIU contents have changed, update tags.
+void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ // Invalidate all block entries.
+ ++Tag;
+ // Invalidate all iterators.
+ PrevPos = SlotIndex();
+ unsigned i = 0;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i)
+ RegUnits[i].VirtTag = LIUArray[*Units].getTag();
+}
+
+void InterferenceCache::Entry::reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF) {
+ assert(!hasRefs() && "Cannot reset cache entry with references");
+ // LIU's changed, invalidate cache.
+ ++Tag;
+ PhysReg = physReg;
+ Blocks.resize(MF->getNumBlockIDs());
+
+ // Reset iterators.
+ PrevPos = SlotIndex();
+ RegUnits.clear();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ RegUnits.push_back(LIUArray[*Units]);
+ RegUnits.back().Fixed = &LIS->getRegUnit(*Units);
+ }
+}
+
+bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI) {
+ unsigned i = 0, e = RegUnits.size();
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) {
+ if (i == e)
+ return false;
+ if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag))
+ return false;
+ }
+ return i == e;
+}
+
+void InterferenceCache::Entry::update(unsigned MBBNum) {
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+
+ // Use advanceTo only when possible.
+ if (PrevPos != Start) {
+ if (!PrevPos.isValid() || Start < PrevPos) {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.find(Start);
+ RUI.FixedI = RUI.Fixed->find(Start);
+ }
+ } else {
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ RegUnitInfo &RUI = RegUnits[i];
+ RUI.VirtI.advanceTo(Start);
+ if (RUI.FixedI != RUI.Fixed->end())
+ RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start);
+ }
+ }
+ PrevPos = Start;
+ }
+
+ MachineFunction::const_iterator MFI =
+ MF->getBlockNumbered(MBBNum)->getIterator();
+ BlockInterference *BI = &Blocks[MBBNum];
+ ArrayRef<SlotIndex> RegMaskSlots;
+ ArrayRef<const uint32_t*> RegMaskBits;
+ for (;;) {
+ BI->Tag = Tag;
+ BI->First = BI->Last = SlotIndex();
+
+ // Check for first interference from virtregs.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ if (!I.valid())
+ continue;
+ SlotIndex StartI = I.start();
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ // Same thing for fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::const_iterator I = RegUnits[i].FixedI;
+ LiveInterval::const_iterator E = RegUnits[i].Fixed->end();
+ if (I == E)
+ continue;
+ SlotIndex StartI = I->start;
+ if (StartI >= Stop)
+ continue;
+ if (!BI->First.isValid() || StartI < BI->First)
+ BI->First = StartI;
+ }
+
+ // Also check for register mask interference.
+ RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
+ RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
+ SlotIndex Limit = BI->First.isValid() ? BI->First : Stop;
+ for (unsigned i = 0, e = RegMaskSlots.size();
+ i != e && RegMaskSlots[i] < Limit; ++i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) {
+ // Register mask i clobbers PhysReg before the LIU interference.
+ BI->First = RegMaskSlots[i];
+ break;
+ }
+
+ PrevPos = Stop;
+ if (BI->First.isValid())
+ break;
+
+ // No interference in this block? Go ahead and precompute the next block.
+ if (++MFI == MF->end())
+ return;
+ MBBNum = MFI->getNumber();
+ BI = &Blocks[MBBNum];
+ if (BI->Tag == Tag)
+ return;
+ std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+ }
+
+ // Check for last interference in block.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI;
+ if (!I.valid() || I.start() >= Stop)
+ continue;
+ I.advanceTo(Stop);
+ bool Backup = !I.valid() || I.start() >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I.stop();
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
+ // Fixed interference.
+ for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
+ LiveInterval::iterator &I = RegUnits[i].FixedI;
+ LiveRange *LR = RegUnits[i].Fixed;
+ if (I == LR->end() || I->start >= Stop)
+ continue;
+ I = LR->advanceTo(I, Stop);
+ bool Backup = I == LR->end() || I->start >= Stop;
+ if (Backup)
+ --I;
+ SlotIndex StopI = I->end;
+ if (!BI->Last.isValid() || StopI > BI->Last)
+ BI->Last = StopI;
+ if (Backup)
+ ++I;
+ }
+
+ // Also check for register mask interference.
+ SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
+ for (unsigned i = RegMaskSlots.size();
+ i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i)
+ if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) {
+ // Register mask i-1 clobbers PhysReg after the LIU interference.
+ // Model the regmask clobber as a dead def.
+ BI->Last = RegMaskSlots[i-1].getDeadSlot();
+ break;
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h
new file mode 100644
index 000000000000..18aa5c7c5ad6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h
@@ -0,0 +1,238 @@
+//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InterferenceCache remembers per-block interference from LiveIntervalUnions,
+// fixed RegUnit interference, and register masks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
+#define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H
+
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+
+namespace llvm {
+
+class LiveIntervals;
+
+class LLVM_LIBRARY_VISIBILITY InterferenceCache {
+ const TargetRegisterInfo *TRI;
+ LiveIntervalUnion *LIUArray;
+ MachineFunction *MF;
+
+ /// BlockInterference - information about the interference in a single basic
+ /// block.
+ struct BlockInterference {
+ BlockInterference() : Tag(0) {}
+ unsigned Tag;
+ SlotIndex First;
+ SlotIndex Last;
+ };
+
+ /// Entry - A cache entry containing interference information for all aliases
+ /// of PhysReg in all basic blocks.
+ class Entry {
+ /// PhysReg - The register currently represented.
+ unsigned PhysReg;
+
+ /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
+ /// change.
+ unsigned Tag;
+
+ /// RefCount - The total number of Cursor instances referring to this Entry.
+ unsigned RefCount;
+
+ /// MF - The current function.
+ MachineFunction *MF;
+
+ /// Indexes - Mapping block numbers to SlotIndex ranges.
+ SlotIndexes *Indexes;
+
+ /// LIS - Used for accessing register mask interference maps.
+ LiveIntervals *LIS;
+
+ /// PrevPos - The previous position the iterators were moved to.
+ SlotIndex PrevPos;
+
+ /// RegUnitInfo - Information tracked about each RegUnit in PhysReg.
+ /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos)
+ /// had just been called.
+ struct RegUnitInfo {
+ /// Iterator pointing into the LiveIntervalUnion containing virtual
+ /// register interference.
+ LiveIntervalUnion::SegmentIter VirtI;
+
+ /// Tag of the LIU last time we looked.
+ unsigned VirtTag;
+
+ /// Fixed interference in RegUnit.
+ LiveRange *Fixed;
+
+ /// Iterator pointing into the fixed RegUnit interference.
+ LiveInterval::iterator FixedI;
+
+ RegUnitInfo(LiveIntervalUnion &LIU)
+ : VirtTag(LIU.getTag()), Fixed(nullptr) {
+ VirtI.setMap(LIU.getMap());
+ }
+ };
+
+ /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have
+ /// more than 4 RegUnits.
+ SmallVector<RegUnitInfo, 4> RegUnits;
+
+ /// Blocks - Interference for each block in the function.
+ SmallVector<BlockInterference, 8> Blocks;
+
+ /// update - Recompute Blocks[MBBNum]
+ void update(unsigned MBBNum);
+
+ public:
+ Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(nullptr), LIS(nullptr) {}
+
+ void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
+ assert(!hasRefs() && "Cannot clear cache entry with references");
+ PhysReg = 0;
+ MF = mf;
+ Indexes = indexes;
+ LIS = lis;
+ }
+
+ unsigned getPhysReg() const { return PhysReg; }
+
+ void addRef(int Delta) { RefCount += Delta; }
+
+ bool hasRefs() const { return RefCount > 0; }
+
+ void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// valid - Return true if this is a valid entry for physReg.
+ bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
+
+ /// reset - Initialize entry to represent physReg's aliases.
+ void reset(unsigned physReg,
+ LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction *MF);
+
+ /// get - Return an up to date BlockInterference.
+ BlockInterference *get(unsigned MBBNum) {
+ if (Blocks[MBBNum].Tag != Tag)
+ update(MBBNum);
+ return &Blocks[MBBNum];
+ }
+ };
+
+ // We don't keep a cache entry for every physical register, that would use too
+ // much memory. Instead, a fixed number of cache entries are used in a round-
+ // robin manner.
+ enum { CacheEntries = 32 };
+
+ // Point to an entry for each physreg. The entry pointed to may not be up to
+ // date, and it may have been reused for a different physreg.
+ unsigned char* PhysRegEntries;
+ size_t PhysRegEntriesCount;
+
+ // Next round-robin entry to be picked.
+ unsigned RoundRobin;
+
+ // The actual cache entries.
+ Entry Entries[CacheEntries];
+
+ // get - Get a valid entry for PhysReg.
+ Entry *get(unsigned PhysReg);
+
+public:
+ InterferenceCache()
+ : TRI(nullptr), LIUArray(nullptr), MF(nullptr), PhysRegEntries(nullptr),
+ PhysRegEntriesCount(0), RoundRobin(0) {}
+
+ ~InterferenceCache() {
+ free(PhysRegEntries);
+ }
+
+ void reinitPhysRegEntries();
+
+ /// init - Prepare cache for a new function.
+ void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
+ const TargetRegisterInfo *);
+
+ /// getMaxCursors - Return the maximum number of concurrent cursors that can
+ /// be supported.
+ unsigned getMaxCursors() const { return CacheEntries; }
+
+ /// Cursor - The primary query interface for the block interference cache.
+ class Cursor {
+ Entry *CacheEntry;
+ const BlockInterference *Current;
+ static const BlockInterference NoInterference;
+
+ void setEntry(Entry *E) {
+ Current = nullptr;
+ // Update reference counts. Nothing happens when RefCount reaches 0, so
+ // we don't have to check for E == CacheEntry etc.
+ if (CacheEntry)
+ CacheEntry->addRef(-1);
+ CacheEntry = E;
+ if (CacheEntry)
+ CacheEntry->addRef(+1);
+ }
+
+ public:
+ /// Cursor - Create a dangling cursor.
+ Cursor() : CacheEntry(nullptr), Current(nullptr) {}
+ ~Cursor() { setEntry(nullptr); }
+
+ Cursor(const Cursor &O) : CacheEntry(nullptr), Current(nullptr) {
+ setEntry(O.CacheEntry);
+ }
+
+ Cursor &operator=(const Cursor &O) {
+ setEntry(O.CacheEntry);
+ return *this;
+ }
+
+ /// setPhysReg - Point this cursor to PhysReg's interference.
+ void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+ // Release reference before getting a new one. That guarantees we can
+ // actually have CacheEntries live cursors.
+ setEntry(nullptr);
+ if (PhysReg)
+ setEntry(Cache.get(PhysReg));
+ }
+
+ /// moveTo - Move cursor to basic block MBBNum.
+ void moveToBlock(unsigned MBBNum) {
+ Current = CacheEntry ? CacheEntry->get(MBBNum) : &NoInterference;
+ }
+
+ /// hasInterference - Return true if the current block has any interference.
+ bool hasInterference() {
+ return Current->First.isValid();
+ }
+
+ /// first - Return the starting index of the first interfering range in the
+ /// current block.
+ SlotIndex first() {
+ return Current->First;
+ }
+
+ /// last - Return the ending index of the last interfering range in the
+ /// current block.
+ SlotIndex last() {
+ return Current->Last;
+ }
+ };
+
+ friend class Cursor;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
new file mode 100644
index 000000000000..3f1111976852
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -0,0 +1,386 @@
+//===--------------------- InterleavedAccessPass.cpp ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Interleaved Access pass, which identifies
+// interleaved memory accesses and transforms them into target specific
+// intrinsics.
+//
+// An interleaved load reads data from memory into several vectors, with
+// DE-interleaving the data on a factor. An interleaved store writes several
+// vectors to memory with RE-interleaving the data on a factor.
+//
+// As interleaved accesses are difficult to identified in CodeGen (mainly
+// because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
+// IR), we identify and transform them to intrinsics in this pass so the
+// intrinsics can be easily matched into target specific instructions later in
+// CodeGen.
+//
+// E.g. An interleaved load (Factor = 2):
+// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
+// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
+// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
+//
+// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
+// intrinsic in ARM backend.
+//
+// E.g. An interleaved store (Factor = 3):
+// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+// store <12 x i32> %i.vec, <12 x i32>* %ptr
+//
+// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
+// intrinsic in ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "interleaved-access"
+
+static cl::opt<bool> LowerInterleavedAccesses(
+ "lower-interleaved-accesses",
+ cl::desc("Enable lowering interleaved accesses to intrinsics"),
+ cl::init(true), cl::Hidden);
+
+static unsigned MaxFactor; // The maximum supported interleave factor.
+
+namespace {
+
+class InterleavedAccess : public FunctionPass {
+
+public:
+ static char ID;
+ InterleavedAccess(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) {
+ initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
+ }
+
+ const char *getPassName() const override { return "Interleaved Access Pass"; }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+
+private:
+ DominatorTree *DT;
+ const TargetMachine *TM;
+ const TargetLowering *TLI;
+
+ /// \brief Transform an interleaved load into target specific intrinsics.
+ bool lowerInterleavedLoad(LoadInst *LI,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
+ /// \brief Transform an interleaved store into target specific intrinsics.
+ bool lowerInterleavedStore(StoreInst *SI,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
+ /// \brief Returns true if the uses of an interleaved load by the
+ /// extractelement instructions in \p Extracts can be replaced by uses of the
+ /// shufflevector instructions in \p Shuffles instead. If so, the necessary
+ /// replacements are also performed.
+ bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
+ ArrayRef<ShuffleVectorInst *> Shuffles);
+};
+} // end anonymous namespace.
+
+char InterleavedAccess::ID = 0;
+INITIALIZE_TM_PASS_BEGIN(
+ InterleavedAccess, "interleaved-access",
+ "Lower interleaved memory accesses to target specific intrinsics", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_TM_PASS_END(
+ InterleavedAccess, "interleaved-access",
+ "Lower interleaved memory accesses to target specific intrinsics", false,
+ false)
+
+FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) {
+ return new InterleavedAccess(TM);
+}
+
+/// \brief Check if the mask is a DE-interleave mask of the given factor
+/// \p Factor like:
+/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
+static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
+ unsigned &Index) {
+ // Check all potential start indices from 0 to (Factor - 1).
+ for (Index = 0; Index < Factor; Index++) {
+ unsigned i = 0;
+
+ // Check that elements are in ascending order by Factor. Ignore undef
+ // elements.
+ for (; i < Mask.size(); i++)
+ if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
+ break;
+
+ if (i == Mask.size())
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Check if the mask is a DE-interleave mask for an interleaved load.
+///
+/// E.g. DE-interleave masks (Factor = 2) could be:
+/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
+/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
+static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
+ unsigned &Index) {
+ if (Mask.size() < 2)
+ return false;
+
+ // Check potential Factors.
+ for (Factor = 2; Factor <= MaxFactor; Factor++)
+ if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
+ return true;
+
+ return false;
+}
+
+/// \brief Check if the mask is RE-interleave mask for an interleaved store.
+///
+/// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...>
+///
+/// E.g. The RE-interleave mask (Factor = 2) could be:
+/// <0, 4, 1, 5, 2, 6, 3, 7>
+static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) {
+ unsigned NumElts = Mask.size();
+ if (NumElts < 4)
+ return false;
+
+ // Check potential Factors.
+ for (Factor = 2; Factor <= MaxFactor; Factor++) {
+ if (NumElts % Factor)
+ continue;
+
+ unsigned NumSubElts = NumElts / Factor;
+ if (!isPowerOf2_32(NumSubElts))
+ continue;
+
+ // Check whether each element matchs the RE-interleaved rule. Ignore undef
+ // elements.
+ unsigned i = 0;
+ for (; i < NumElts; i++)
+ if (Mask[i] >= 0 &&
+ static_cast<unsigned>(Mask[i]) !=
+ (i % Factor) * NumSubElts + i / Factor)
+ break;
+
+ // Find a RE-interleaved mask of current factor.
+ if (i == NumElts)
+ return true;
+ }
+
+ return false;
+}
+
+bool InterleavedAccess::lowerInterleavedLoad(
+ LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
+ if (!LI->isSimple())
+ return false;
+
+ SmallVector<ShuffleVectorInst *, 4> Shuffles;
+ SmallVector<ExtractElementInst *, 4> Extracts;
+
+ // Check if all users of this load are shufflevectors. If we encounter any
+ // users that are extractelement instructions, we save them to later check if
+ // they can be modifed to extract from one of the shufflevectors instead of
+ // the load.
+ for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
+ auto *Extract = dyn_cast<ExtractElementInst>(*UI);
+ if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
+ Extracts.push_back(Extract);
+ continue;
+ }
+ ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI);
+ if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
+ return false;
+
+ Shuffles.push_back(SVI);
+ }
+
+ if (Shuffles.empty())
+ return false;
+
+ unsigned Factor, Index;
+
+ // Check if the first shufflevector is DE-interleave shuffle.
+ if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index))
+ return false;
+
+ // Holds the corresponding index for each DE-interleave shuffle.
+ SmallVector<unsigned, 4> Indices;
+ Indices.push_back(Index);
+
+ Type *VecTy = Shuffles[0]->getType();
+
+ // Check if other shufflevectors are also DE-interleaved of the same type
+ // and factor as the first shufflevector.
+ for (unsigned i = 1; i < Shuffles.size(); i++) {
+ if (Shuffles[i]->getType() != VecTy)
+ return false;
+
+ if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor,
+ Index))
+ return false;
+
+ Indices.push_back(Index);
+ }
+
+ // Try and modify users of the load that are extractelement instructions to
+ // use the shufflevector instructions instead of the load.
+ if (!tryReplaceExtracts(Extracts, Shuffles))
+ return false;
+
+ DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
+
+ // Try to create target specific intrinsics to replace the load and shuffles.
+ if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
+ return false;
+
+ for (auto SVI : Shuffles)
+ DeadInsts.push_back(SVI);
+
+ DeadInsts.push_back(LI);
+ return true;
+}
+
+bool InterleavedAccess::tryReplaceExtracts(
+ ArrayRef<ExtractElementInst *> Extracts,
+ ArrayRef<ShuffleVectorInst *> Shuffles) {
+
+ // If there aren't any extractelement instructions to modify, there's nothing
+ // to do.
+ if (Extracts.empty())
+ return true;
+
+ // Maps extractelement instructions to vector-index pairs. The extractlement
+ // instructions will be modified to use the new vector and index operands.
+ DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
+
+ for (auto *Extract : Extracts) {
+
+ // The vector index that is extracted.
+ auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
+ auto Index = IndexOperand->getSExtValue();
+
+ // Look for a suitable shufflevector instruction. The goal is to modify the
+ // extractelement instruction (which uses an interleaved load) to use one
+ // of the shufflevector instructions instead of the load.
+ for (auto *Shuffle : Shuffles) {
+
+ // If the shufflevector instruction doesn't dominate the extract, we
+ // can't create a use of it.
+ if (!DT->dominates(Shuffle, Extract))
+ continue;
+
+ // Inspect the indices of the shufflevector instruction. If the shuffle
+ // selects the same index that is extracted, we can modify the
+ // extractelement instruction.
+ SmallVector<int, 4> Indices;
+ Shuffle->getShuffleMask(Indices);
+ for (unsigned I = 0; I < Indices.size(); ++I)
+ if (Indices[I] == Index) {
+ assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
+ "Vector operations do not match");
+ ReplacementMap[Extract] = std::make_pair(Shuffle, I);
+ break;
+ }
+
+ // If we found a suitable shufflevector instruction, stop looking.
+ if (ReplacementMap.count(Extract))
+ break;
+ }
+
+ // If we did not find a suitable shufflevector instruction, the
+ // extractelement instruction cannot be modified, so we must give up.
+ if (!ReplacementMap.count(Extract))
+ return false;
+ }
+
+ // Finally, perform the replacements.
+ IRBuilder<> Builder(Extracts[0]->getContext());
+ for (auto &Replacement : ReplacementMap) {
+ auto *Extract = Replacement.first;
+ auto *Vector = Replacement.second.first;
+ auto Index = Replacement.second.second;
+ Builder.SetInsertPoint(Extract);
+ Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
+ Extract->eraseFromParent();
+ }
+
+ return true;
+}
+
+bool InterleavedAccess::lowerInterleavedStore(
+ StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
+ if (!SI->isSimple())
+ return false;
+
+ ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+ if (!SVI || !SVI->hasOneUse())
+ return false;
+
+ // Check if the shufflevector is RE-interleave shuffle.
+ unsigned Factor;
+ if (!isReInterleaveMask(SVI->getShuffleMask(), Factor))
+ return false;
+
+ DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
+
+ // Try to create target specific intrinsics to replace the store and shuffle.
+ if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
+ return false;
+
+ // Already have a new target specific interleaved store. Erase the old store.
+ DeadInsts.push_back(SI);
+ DeadInsts.push_back(SVI);
+ return true;
+}
+
+bool InterleavedAccess::runOnFunction(Function &F) {
+ if (!TM || !LowerInterleavedAccesses)
+ return false;
+
+ DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
+
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ MaxFactor = TLI->getMaxSupportedInterleaveFactor();
+
+ // Holds dead instructions that will be erased later.
+ SmallVector<Instruction *, 32> DeadInsts;
+ bool Changed = false;
+
+ for (auto &I : instructions(F)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+ Changed |= lowerInterleavedLoad(LI, DeadInsts);
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ Changed |= lowerInterleavedStore(SI, DeadInsts);
+ }
+
+ for (auto I : DeadInsts)
+ I->eraseFromParent();
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 000000000000..2962f8701625
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,608 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ Type *RetTy) {
+ // Insert a correctly-typed definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back(I->getType());
+ M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function &Fn,
+ const char *FName,
+ const char *DName, const char *LDName) {
+ // Insert definitions for all the floating point types.
+ switch((int)Fn.arg_begin()->getType()->getTypeID()) {
+ case Type::FloatTyID:
+ EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(),
+ Type::getFloatTy(M.getContext()));
+ break;
+ case Type::DoubleTyID:
+ EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(),
+ Type::getDoubleTy(M.getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(),
+ Fn.arg_begin()->getType());
+ break;
+ }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function. This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+ ArgIt ArgBegin, ArgIt ArgEnd,
+ Type *RetTy) {
+ // If we haven't already looked up this function, check to see if the
+ // program already contains a function with this name.
+ Module *M = CI->getModule();
+ // Get or insert the definition now.
+ std::vector<Type *> ParamTys;
+ for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+ ParamTys.push_back((*I)->getType());
+ Constant* FCache = M->getOrInsertFunction(NewFn,
+ FunctionType::get(RetTy, ParamTys, false));
+
+ IRBuilder<> Builder(CI->getParent(), CI->getIterator());
+ SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+ CallInst *NewCI = Builder.CreateCall(FCache, Args);
+ NewCI->setName(CI->getName());
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+ LLVMContext &Context = M.getContext();
+ for (auto &F : M)
+ if (F.isDeclaration() && !F.use_empty())
+ switch (F.getIntrinsicID()) {
+ default: break;
+ case Intrinsic::setjmp:
+ EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(),
+ Type::getInt32Ty(M.getContext()));
+ break;
+ case Intrinsic::longjmp:
+ EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::siglongjmp:
+ EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(),
+ Type::getVoidTy(M.getContext()));
+ break;
+ case Intrinsic::memcpy:
+ M.getOrInsertFunction("memcpy",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ DL.getIntPtrType(Context), nullptr);
+ break;
+ case Intrinsic::memmove:
+ M.getOrInsertFunction("memmove",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ DL.getIntPtrType(Context), nullptr);
+ break;
+ case Intrinsic::memset:
+ M.getOrInsertFunction("memset",
+ Type::getInt8PtrTy(Context),
+ Type::getInt8PtrTy(Context),
+ Type::getInt32Ty(M.getContext()),
+ DL.getIntPtrType(Context), nullptr);
+ break;
+ case Intrinsic::sqrt:
+ EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
+ break;
+ case Intrinsic::sin:
+ EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl");
+ break;
+ case Intrinsic::cos:
+ EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl");
+ break;
+ case Intrinsic::pow:
+ EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl");
+ break;
+ case Intrinsic::log:
+ EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl");
+ break;
+ case Intrinsic::log2:
+ EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l");
+ break;
+ case Intrinsic::log10:
+ EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l");
+ break;
+ case Intrinsic::exp:
+ EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl");
+ break;
+ case Intrinsic::exp2:
+ EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l");
+ break;
+ }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+
+ IRBuilder<> Builder(IP);
+
+ switch(BitSize) {
+ default: llvm_unreachable("Unhandled type size of value to byteswap!");
+ case 16: {
+ Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.1");
+ V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+ break;
+ }
+ case 32: {
+ Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.4");
+ Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.3");
+ Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.2");
+ Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
+ "bswap.1");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+ "bswap.and2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+ V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+ break;
+ }
+ case 64: {
+ Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+ "bswap.8");
+ Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+ "bswap.7");
+ Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+ "bswap.6");
+ Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+ "bswap.5");
+ Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+ "bswap.4");
+ Value* Tmp3 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 24),
+ "bswap.3");
+ Value* Tmp2 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 40),
+ "bswap.2");
+ Value* Tmp1 = Builder.CreateLShr(V,
+ ConstantInt::get(V->getType(), 56),
+ "bswap.1");
+ Tmp7 = Builder.CreateAnd(Tmp7,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000000000ULL),
+ "bswap.and7");
+ Tmp6 = Builder.CreateAnd(Tmp6,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000000000ULL),
+ "bswap.and6");
+ Tmp5 = Builder.CreateAnd(Tmp5,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00000000ULL),
+ "bswap.and5");
+ Tmp4 = Builder.CreateAnd(Tmp4,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF000000ULL),
+ "bswap.and4");
+ Tmp3 = Builder.CreateAnd(Tmp3,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF0000ULL),
+ "bswap.and3");
+ Tmp2 = Builder.CreateAnd(Tmp2,
+ ConstantInt::get(Type::getInt64Ty(Context),
+ 0xFF00ULL),
+ "bswap.and2");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+ Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+ Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+ Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+ Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+ V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+ break;
+ }
+ }
+ return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
+ assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
+
+ static const uint64_t MaskValues[6] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL,
+ 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+ 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+ };
+
+ IRBuilder<> Builder(IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ unsigned WordSize = (BitSize + 63) / 64;
+ Value *Count = ConstantInt::get(V->getType(), 0);
+
+ for (unsigned n = 0; n < WordSize; ++n) {
+ Value *PartValue = V;
+ for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
+ i <<= 1, ++ct) {
+ Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+ Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+ Value *VShift = Builder.CreateLShr(PartValue,
+ ConstantInt::get(V->getType(), i),
+ "ctpop.sh");
+ Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+ PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+ }
+ Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+ if (BitSize > 64) {
+ V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+ "ctpop.part.sh");
+ BitSize -= 64;
+ }
+ }
+
+ return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
+
+ IRBuilder<> Builder(IP);
+
+ unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+ for (unsigned i = 1; i < BitSize; i <<= 1) {
+ Value *ShVal = ConstantInt::get(V->getType(), i);
+ ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+ V = Builder.CreateOr(V, ShVal, "ctlz.step");
+ }
+
+ V = Builder.CreateNot(V);
+ return LowerCTPOP(Context, V, IP);
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+ const char *Dname,
+ const char *LDname) {
+ CallSite CS(CI);
+ switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+ default: llvm_unreachable("Invalid type in intrinsic");
+ case Type::FloatTyID:
+ ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getFloatTy(CI->getContext()));
+ break;
+ case Type::DoubleTyID:
+ ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
+ Type::getDoubleTy(CI->getContext()));
+ break;
+ case Type::X86_FP80TyID:
+ case Type::FP128TyID:
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
+ CI->getArgOperand(0)->getType());
+ break;
+ }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+ IRBuilder<> Builder(CI);
+ LLVMContext &Context = CI->getContext();
+
+ const Function *Callee = CI->getCalledFunction();
+ assert(Callee && "Cannot lower an indirect call!");
+
+ CallSite CS(CI);
+ switch (Callee->getIntrinsicID()) {
+ case Intrinsic::not_intrinsic:
+ report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
+ Callee->getName() + "'!");
+ default:
+ report_fatal_error("Code generator does not support intrinsic function '"+
+ Callee->getName()+"'!");
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ Value *V = CI->getArgOperand(0);
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+
+ // The setjmp/longjmp intrinsics should only exist in the code if it was
+ // never optimized (ie, right out of the CFE), or if it has been hacked on
+ // by the lowerinvoke pass. In both cases, the right thing to do is to
+ // convert the call to an explicit setjmp or longjmp call.
+ case Intrinsic::setjmp: {
+ Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getInt32Ty(Context));
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(V);
+ break;
+ }
+ case Intrinsic::sigsetjmp:
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+
+ case Intrinsic::longjmp: {
+ ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+
+ case Intrinsic::siglongjmp: {
+ // Insert the call to abort
+ ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
+ Type::getVoidTy(Context));
+ break;
+ }
+ case Intrinsic::ctpop:
+ CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::bswap:
+ CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::ctlz:
+ CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
+ break;
+
+ case Intrinsic::cttz: {
+ // cttz(x) -> ctpop(~X & (X-1))
+ Value *Src = CI->getArgOperand(0);
+ Value *NotSrc = Builder.CreateNot(Src);
+ NotSrc->setName(Src->getName() + ".not");
+ Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+ SrcM1 = Builder.CreateSub(Src, SrcM1);
+ Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
+ CI->replaceAllUsesWith(Src);
+ break;
+ }
+
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore: {
+ if (!Warned)
+ errs() << "WARNING: this target does not support the llvm.stack"
+ << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+ "save" : "restore") << " intrinsic.\n";
+ Warned = true;
+ if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+ CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+ break;
+ }
+
+ case Intrinsic::get_dynamic_area_offset:
+ errs() << "WARNING: this target does not support the custom llvm.get."
+ "dynamic.area.offset. It is being lowered to a constant 0\n";
+ // Just lower it to a constant 0 because for most targets
+ // @llvm.get.dynamic.area.offset is lowered to zero.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0));
+ break;
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ errs() << "WARNING: this target does not support the llvm."
+ << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+ "return" : "frame") << "address intrinsic.\n";
+ CI->replaceAllUsesWith(ConstantPointerNull::get(
+ cast<PointerType>(CI->getType())));
+ break;
+
+ case Intrinsic::prefetch:
+ break; // Simply strip out prefetches on unsupported architectures
+
+ case Intrinsic::pcmarker:
+ break; // Simply strip out pcmarker on unsupported architectures
+ case Intrinsic::readcyclecounter: {
+ errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+ << "ter intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+ break;
+ }
+
+ case Intrinsic::dbg_declare:
+ break; // Simply strip out debugging intrinsics
+
+ case Intrinsic::eh_typeid_for:
+ // Return something different to eh_selector.
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ // Just drop the annotation, but forward the value
+ CI->replaceAllUsesWith(CI->getOperand(0));
+ break;
+
+ case Intrinsic::assume:
+ case Intrinsic::var_annotation:
+ break; // Strip out these intrinsics
+
+ case Intrinsic::memcpy: {
+ Type *IntPtr = DL.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memmove: {
+ Type *IntPtr = DL.getIntPtrType(Context);
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
+ Ops[2] = Size;
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::memset: {
+ Value *Op0 = CI->getArgOperand(0);
+ Type *IntPtr = DL.getIntPtrType(Op0->getType());
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+ /* isSigned */ false);
+ Value *Ops[3];
+ Ops[0] = Op0;
+ // Extend the amount to i32.
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt32Ty(Context),
+ /* isSigned */ false);
+ Ops[2] = Size;
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+ break;
+ }
+ case Intrinsic::sqrt: {
+ ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
+ break;
+ }
+ case Intrinsic::log: {
+ ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
+ break;
+ }
+ case Intrinsic::log2: {
+ ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
+ break;
+ }
+ case Intrinsic::log10: {
+ ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
+ break;
+ }
+ case Intrinsic::exp: {
+ ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
+ break;
+ }
+ case Intrinsic::exp2: {
+ ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
+ break;
+ }
+ case Intrinsic::pow: {
+ ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
+ break;
+ }
+ case Intrinsic::sin: {
+ ReplaceFPIntrinsicWithCall(CI, "sinf", "sin", "sinl");
+ break;
+ }
+ case Intrinsic::cos: {
+ ReplaceFPIntrinsicWithCall(CI, "cosf", "cos", "cosl");
+ break;
+ }
+ case Intrinsic::floor: {
+ ReplaceFPIntrinsicWithCall(CI, "floorf", "floor", "floorl");
+ break;
+ }
+ case Intrinsic::ceil: {
+ ReplaceFPIntrinsicWithCall(CI, "ceilf", "ceil", "ceill");
+ break;
+ }
+ case Intrinsic::trunc: {
+ ReplaceFPIntrinsicWithCall(CI, "truncf", "trunc", "truncl");
+ break;
+ }
+ case Intrinsic::round: {
+ ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl");
+ break;
+ }
+ case Intrinsic::copysign: {
+ ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl");
+ break;
+ }
+ case Intrinsic::flt_rounds:
+ // Lower to "round to the nearest"
+ if (!CI->getType()->isVoidTy())
+ CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+ break;
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ break;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ break;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+ // Verify this is a simple bswap.
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
+ !CI->getType()->isIntegerTy())
+ return false;
+
+ IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ Module *M = CI->getModule();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+
+ Value *Op = CI->getArgOperand(0);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 000000000000..9eb43d2bec10
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,311 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+ cl::desc("Enable the \"fast\" instruction selector"));
+
+static cl::opt<bool>
+ EnableGlobalISel("global-isel", cl::Hidden, cl::init(false),
+ cl::desc("Enable the \"global\" instruction selector"));
+
+void LLVMTargetMachine::initAsmInfo() {
+ MRI = TheTarget.createMCRegInfo(getTargetTriple().str());
+ MII = TheTarget.createMCInstrInfo();
+ // FIXME: Having an MCSubtargetInfo on the target machine is a hack due
+ // to some backends having subtarget feature dependent module level
+ // code generation. This is similar to the hack in the AsmPrinter for
+ // module level assembly etc.
+ STI = TheTarget.createMCSubtargetInfo(getTargetTriple().str(), getTargetCPU(),
+ getTargetFeatureString());
+
+ MCAsmInfo *TmpAsmInfo =
+ TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str());
+ // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
+ // and if the old one gets included then MCAsmInfo will be NULL and
+ // we'll crash later.
+ // Provide the user with a useful error message about what's wrong.
+ assert(TmpAsmInfo && "MCAsmInfo not initialized. "
+ "Make sure you include the correct TargetSelect.h"
+ "and that InitializeAllTargetMCs() is being invoked!");
+
+ if (Options.DisableIntegratedAS)
+ TmpAsmInfo->setUseIntegratedAssembler(false);
+
+ TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments);
+
+ if (Options.CompressDebugSections)
+ TmpAsmInfo->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu);
+
+ TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations);
+
+ if (Options.ExceptionModel != ExceptionHandling::None)
+ TmpAsmInfo->setExceptionsType(Options.ExceptionModel);
+
+ AsmInfo = TmpAsmInfo;
+}
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+ StringRef DataLayoutString,
+ const Triple &TT, StringRef CPU,
+ StringRef FS, TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
+ T.adjustCodeGenOpts(TT, RM, CM);
+ this->RM = RM;
+ this->CMModel = CM;
+ this->OptLevel = OL;
+}
+
+TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(BasicTTIImpl(this, F));
+ });
+}
+
+MachineModuleInfo &
+LLVMTargetMachine::addMachineModuleInfo(PassManagerBase &PM) const {
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
+ *getMCRegisterInfo(),
+ getObjFileLowering());
+ PM.add(MMI);
+ return *MMI;
+}
+
+void LLVMTargetMachine::addMachineFunctionAnalysis(PassManagerBase &PM,
+ MachineFunctionInitializer *MFInitializer) const {
+ PM.add(new MachineFunctionAnalysis(*this, MFInitializer));
+}
+
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *
+addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
+ bool DisableVerify, AnalysisID StartBefore,
+ AnalysisID StartAfter, AnalysisID StopAfter,
+ MachineFunctionInitializer *MFInitializer = nullptr) {
+
+ // When in emulated TLS mode, add the LowerEmuTLS pass.
+ if (TM->Options.EmulatedTLS)
+ PM.add(createLowerEmuTLSPass(TM));
+
+ PM.add(createPreISelIntrinsicLoweringPass());
+
+ // Add internal analysis passes from the target machine.
+ PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+
+ // Targets may override createPassConfig to provide a target-specific
+ // subclass.
+ TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+ PassConfig->setStartStopPasses(StartBefore, StartAfter, StopAfter);
+
+ // Set PassConfig options provided by TargetMachine.
+ PassConfig->setDisableVerify(DisableVerify);
+
+ PM.add(PassConfig);
+
+ PassConfig->addIRPasses();
+
+ PassConfig->addCodeGenPrepare();
+
+ PassConfig->addPassesToHandleExceptions();
+
+ PassConfig->addISelPrepare();
+
+ MachineModuleInfo &MMI = TM->addMachineModuleInfo(PM);
+ TM->addMachineFunctionAnalysis(PM, MFInitializer);
+
+ // Enable FastISel with -fast, but allow that to be overridden.
+ TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
+ if (EnableFastISelOption == cl::BOU_TRUE ||
+ (TM->getOptLevel() == CodeGenOpt::None &&
+ TM->getO0WantsFastISel()))
+ TM->setFastISel(true);
+
+ // Ask the target for an isel.
+ if (LLVM_UNLIKELY(EnableGlobalISel)) {
+ if (PassConfig->addIRTranslator())
+ return nullptr;
+
+ // Before running the register bank selector, ask the target if it
+ // wants to run some passes.
+ PassConfig->addPreRegBankSelect();
+
+ if (PassConfig->addRegBankSelect())
+ return nullptr;
+
+ } else if (PassConfig->addInstSelector())
+ return nullptr;
+
+ PassConfig->addMachinePasses();
+
+ PassConfig->setInitialized();
+
+ return &MMI.getContext();
+}
+
+bool LLVMTargetMachine::addPassesToEmitFile(
+ PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
+ bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
+ AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) {
+ // Add common CodeGen passes.
+ MCContext *Context =
+ addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
+ StopAfter, MFInitializer);
+ if (!Context)
+ return true;
+
+ if (StopAfter) {
+ PM.add(createPrintMIRPass(Out));
+ return false;
+ }
+
+ if (Options.MCOptions.MCSaveTempLabels)
+ Context->setAllowTemporaryLabels(false);
+
+ const MCSubtargetInfo &STI = *getMCSubtargetInfo();
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCRegisterInfo &MRI = *getMCRegisterInfo();
+ const MCInstrInfo &MII = *getMCInstrInfo();
+
+ std::unique_ptr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
+ getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI);
+
+ // Create a code emitter if asked to show the encoding.
+ MCCodeEmitter *MCE = nullptr;
+ if (Options.MCOptions.ShowMCEncoding)
+ MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
+
+ MCAsmBackend *MAB =
+ getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+ auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
+ MCStreamer *S = getTarget().createAsmStreamer(
+ *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
+ Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB,
+ Options.MCOptions.ShowMCInst);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
+ MCAsmBackend *MAB =
+ getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+ if (!MCE || !MAB)
+ return true;
+
+ // Don't waste memory on names of temp labels.
+ Context->setUseNamesOnTempLabels(false);
+
+ Triple T(getTargetTriple().str());
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(
+ T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ Options.MCOptions.MCIncrementalLinkerCompatible,
+ /*DWARFMustBeAtTheEnd*/ true));
+ break;
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(getTarget().createNullStreamer(*Context));
+ break;
+ }
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer =
+ getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
+ if (!Printer)
+ return true;
+
+ PM.add(Printer);
+
+ return false;
+}
+
+/// addPassesToEmitMC - Add passes to the specified pass manager to get
+/// machine code emitted with the MCJIT. This method returns true if machine
+/// code is not supported. It fills the MCContext Ctx pointer which can be
+/// used to build custom MCStreamer.
+///
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
+ raw_pwrite_stream &Out,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr,
+ nullptr);
+ if (!Ctx)
+ return true;
+
+ if (Options.MCOptions.MCSaveTempLabels)
+ Ctx->setAllowTemporaryLabels(false);
+
+ // Create the code emitter for the target if it exists. If not, .o file
+ // emission fails.
+ const MCRegisterInfo &MRI = *getMCRegisterInfo();
+ MCCodeEmitter *MCE =
+ getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
+ MCAsmBackend *MAB =
+ getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+ if (!MCE || !MAB)
+ return true;
+
+ const Triple &T = getTargetTriple();
+ const MCSubtargetInfo &STI = *getMCSubtargetInfo();
+ std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
+ T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ Options.MCOptions.MCIncrementalLinkerCompatible,
+ /*DWARFMustBeAtTheEnd*/ true));
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer =
+ getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
+ if (!Printer)
+ return true;
+
+ PM.add(Printer);
+
+ return false; // success!
+}
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 000000000000..43218492ed1c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,140 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "scheduler"
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return RHSNum < LHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = nullptr;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return nullptr;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+
+ return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+ }
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+ Queue.push_back(SU);
+}
+
+
+// scheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor. If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+ }
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+SUnit *LatencyPriorityQueue::pop() {
+ if (empty()) return nullptr;
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != std::prev(Queue.end()))
+ std::swap(*Best, Queue.back());
+ Queue.pop_back();
+ return V;
+}
+
+void LatencyPriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != std::prev(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+}
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
new file mode 100644
index 000000000000..b810176e6a18
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -0,0 +1,332 @@
+//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LexicalScopes analysis.
+//
+// This pass collects lexical scope information and maps machine instructions
+// to respective lexical scopes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "lexicalscopes"
+
+/// reset - Reset the instance so that it's prepared for another function.
+void LexicalScopes::reset() {
+ MF = nullptr;
+ CurrentFnLexicalScope = nullptr;
+ LexicalScopeMap.clear();
+ AbstractScopeMap.clear();
+ InlinedLexicalScopeMap.clear();
+ AbstractScopesList.clear();
+}
+
+/// initialize - Scan machine function and constuct lexical scope nest.
+void LexicalScopes::initialize(const MachineFunction &Fn) {
+ reset();
+ MF = &Fn;
+ SmallVector<InsnRange, 4> MIRanges;
+ DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap;
+ extractLexicalScopes(MIRanges, MI2ScopeMap);
+ if (CurrentFnLexicalScope) {
+ constructScopeNest(CurrentFnLexicalScope);
+ assignInstructionRanges(MIRanges, MI2ScopeMap);
+ }
+}
+
+/// extractLexicalScopes - Extract instruction ranges for each lexical scopes
+/// for the given machine function.
+void LexicalScopes::extractLexicalScopes(
+ SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+
+ // Scan each instruction and create scopes. First build working set of scopes.
+ for (const auto &MBB : *MF) {
+ const MachineInstr *RangeBeginMI = nullptr;
+ const MachineInstr *PrevMI = nullptr;
+ const DILocation *PrevDL = nullptr;
+ for (const auto &MInsn : MBB) {
+ // Check if instruction has valid location information.
+ const DILocation *MIDL = MInsn.getDebugLoc();
+ if (!MIDL) {
+ PrevMI = &MInsn;
+ continue;
+ }
+
+ // If scope has not changed then skip this instruction.
+ if (MIDL == PrevDL) {
+ PrevMI = &MInsn;
+ continue;
+ }
+
+ // Ignore DBG_VALUE. It does not contribute to any instruction in output.
+ if (MInsn.isDebugValue())
+ continue;
+
+ if (RangeBeginMI) {
+ // If we have already seen a beginning of an instruction range and
+ // current instruction scope does not match scope of first instruction
+ // in this range then create a new instruction range.
+ InsnRange R(RangeBeginMI, PrevMI);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ MIRanges.push_back(R);
+ }
+
+ // This is a beginning of a new instruction range.
+ RangeBeginMI = &MInsn;
+
+ // Reset previous markers.
+ PrevMI = &MInsn;
+ PrevDL = MIDL;
+ }
+
+ // Create last instruction range.
+ if (RangeBeginMI && PrevMI && PrevDL) {
+ InsnRange R(RangeBeginMI, PrevMI);
+ MIRanges.push_back(R);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
+ }
+ }
+}
+
+/// findLexicalScope - Find lexical scope, either regular or inlined, for the
+/// given DebugLoc. Return NULL if not found.
+LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) {
+ DILocalScope *Scope = DL->getScope();
+ if (!Scope)
+ return nullptr;
+
+ // The scope that we were created with could have an extra file - which
+ // isn't what we care about in this case.
+ Scope = Scope->getNonLexicalBlockFileScope();
+
+ if (auto *IA = DL->getInlinedAt()) {
+ auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
+ return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
+ }
+ return findLexicalScope(Scope);
+}
+
+/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
+/// not available then create new lexical scope.
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope,
+ const DILocation *IA) {
+ if (IA) {
+ // Create an abstract scope for inlined function.
+ getOrCreateAbstractScope(Scope);
+ // Create an inlined scope for inlined function.
+ return getOrCreateInlinedScope(Scope, IA);
+ }
+
+ return getOrCreateRegularScope(Scope);
+}
+
+/// getOrCreateRegularScope - Find or create a regular lexical scope.
+LexicalScope *
+LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) {
+ assert(Scope && "Invalid Scope encoding!");
+ Scope = Scope->getNonLexicalBlockFileScope();
+
+ auto I = LexicalScopeMap.find(Scope);
+ if (I != LexicalScopeMap.end())
+ return &I->second;
+
+ // FIXME: Should the following dyn_cast be DILexicalBlock?
+ LexicalScope *Parent = nullptr;
+ if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+ Parent = getOrCreateLexicalScope(Block->getScope());
+ I = LexicalScopeMap.emplace(std::piecewise_construct,
+ std::forward_as_tuple(Scope),
+ std::forward_as_tuple(Parent, Scope, nullptr,
+ false)).first;
+
+ if (!Parent) {
+ assert(cast<DISubprogram>(Scope)->describes(MF->getFunction()));
+ assert(!CurrentFnLexicalScope);
+ CurrentFnLexicalScope = &I->second;
+ }
+
+ return &I->second;
+}
+
+/// getOrCreateInlinedScope - Find or create an inlined lexical scope.
+LexicalScope *
+LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope,
+ const DILocation *InlinedAt) {
+ assert(Scope && "Invalid Scope encoding!");
+ Scope = Scope->getNonLexicalBlockFileScope();
+ std::pair<const DILocalScope *, const DILocation *> P(Scope, InlinedAt);
+ auto I = InlinedLexicalScopeMap.find(P);
+ if (I != InlinedLexicalScopeMap.end())
+ return &I->second;
+
+ LexicalScope *Parent;
+ if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+ Parent = getOrCreateInlinedScope(Block->getScope(), InlinedAt);
+ else
+ Parent = getOrCreateLexicalScope(InlinedAt);
+
+ I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
+ std::forward_as_tuple(P),
+ std::forward_as_tuple(Parent, Scope,
+ InlinedAt, false))
+ .first;
+ return &I->second;
+}
+
+/// getOrCreateAbstractScope - Find or create an abstract lexical scope.
+LexicalScope *
+LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) {
+ assert(Scope && "Invalid Scope encoding!");
+ Scope = Scope->getNonLexicalBlockFileScope();
+ auto I = AbstractScopeMap.find(Scope);
+ if (I != AbstractScopeMap.end())
+ return &I->second;
+
+ // FIXME: Should the following isa be DILexicalBlock?
+ LexicalScope *Parent = nullptr;
+ if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope))
+ Parent = getOrCreateAbstractScope(Block->getScope());
+
+ I = AbstractScopeMap.emplace(std::piecewise_construct,
+ std::forward_as_tuple(Scope),
+ std::forward_as_tuple(Parent, Scope,
+ nullptr, true)).first;
+ if (isa<DISubprogram>(Scope))
+ AbstractScopesList.push_back(&I->second);
+ return &I->second;
+}
+
+/// constructScopeNest
+void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
+ assert(Scope && "Unable to calculate scope dominance graph!");
+ SmallVector<LexicalScope *, 4> WorkStack;
+ WorkStack.push_back(Scope);
+ unsigned Counter = 0;
+ while (!WorkStack.empty()) {
+ LexicalScope *WS = WorkStack.back();
+ const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
+ bool visitedChildren = false;
+ for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(),
+ SE = Children.end();
+ SI != SE; ++SI) {
+ LexicalScope *ChildScope = *SI;
+ if (!ChildScope->getDFSOut()) {
+ WorkStack.push_back(ChildScope);
+ visitedChildren = true;
+ ChildScope->setDFSIn(++Counter);
+ break;
+ }
+ }
+ if (!visitedChildren) {
+ WorkStack.pop_back();
+ WS->setDFSOut(++Counter);
+ }
+ }
+}
+
+/// assignInstructionRanges - Find ranges of instructions covered by each
+/// lexical scope.
+void LexicalScopes::assignInstructionRanges(
+ SmallVectorImpl<InsnRange> &MIRanges,
+ DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+
+ LexicalScope *PrevLexicalScope = nullptr;
+ for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
+ RE = MIRanges.end();
+ RI != RE; ++RI) {
+ const InsnRange &R = *RI;
+ LexicalScope *S = MI2ScopeMap.lookup(R.first);
+ assert(S && "Lost LexicalScope for a machine instruction!");
+ if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
+ PrevLexicalScope->closeInsnRange(S);
+ S->openInsnRange(R.first);
+ S->extendInsnRange(R.second);
+ PrevLexicalScope = S;
+ }
+
+ if (PrevLexicalScope)
+ PrevLexicalScope->closeInsnRange();
+}
+
+/// getMachineBasicBlocks - Populate given set using machine basic blocks which
+/// have machine instructions that belong to lexical scope identified by
+/// DebugLoc.
+void LexicalScopes::getMachineBasicBlocks(
+ const DILocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
+ MBBs.clear();
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return;
+
+ if (Scope == CurrentFnLexicalScope) {
+ for (const auto &MBB : *MF)
+ MBBs.insert(&MBB);
+ return;
+ }
+
+ SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges();
+ for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(),
+ E = InsnRanges.end();
+ I != E; ++I) {
+ InsnRange &R = *I;
+ MBBs.insert(R.first->getParent());
+ }
+}
+
+/// dominates - Return true if DebugLoc's lexical scope dominates at least one
+/// machine instruction's lexical scope in a given machine basic block.
+bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
+ LexicalScope *Scope = getOrCreateLexicalScope(DL);
+ if (!Scope)
+ return false;
+
+ // Current function scope covers all basic blocks in the function.
+ if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF)
+ return true;
+
+ bool Result = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ if (const DILocation *IDL = I->getDebugLoc())
+ if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
+ if (Scope->dominates(IScope))
+ return true;
+ }
+ return Result;
+}
+
+/// dump - Print data structures.
+void LexicalScope::dump(unsigned Indent) const {
+#ifndef NDEBUG
+ raw_ostream &err = dbgs();
+ err.indent(Indent);
+ err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
+ const MDNode *N = Desc;
+ err.indent(Indent);
+ N->dump();
+ if (AbstractScope)
+ err << std::string(Indent, ' ') << "Abstract Scope\n";
+
+ if (!Children.empty())
+ err << std::string(Indent + 2, ' ') << "Children ...\n";
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ if (Children[i] != this)
+ Children[i]->dump(Indent + 2);
+#endif
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
new file mode 100644
index 000000000000..4ff88d528108
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -0,0 +1,516 @@
+//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass implements a data flow analysis that propagates debug location
+/// information by inserting additional DBG_VALUE instructions into the machine
+/// instruction stream. The pass internally builds debug location liveness
+/// ranges to determine the points where additional DBG_VALUEs need to be
+/// inserted.
+///
+/// This is a separate pass from DbgValueHistoryCalculator to facilitate
+/// testing and improve modularity.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <list>
+#include <queue>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "live-debug-values"
+
+STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
+
+namespace {
+
+// \brief If @MI is a DBG_VALUE with debug value described by a defined
+// register, returns the number of this register. In the other case, returns 0.
+static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
+ assert(MI.isDebugValue() && "expected a DBG_VALUE");
+ assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
+ // If location of variable is described using a register (directly
+ // or indirectly), this register is always a first operand.
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+}
+
+class LiveDebugValues : public MachineFunctionPass {
+
+private:
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+
+ /// Based on std::pair so it can be used as an index into a DenseMap.
+ typedef std::pair<const DILocalVariable *, const DILocation *>
+ DebugVariableBase;
+ /// A potentially inlined instance of a variable.
+ struct DebugVariable : public DebugVariableBase {
+ DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt)
+ : DebugVariableBase(Var, InlinedAt) {}
+
+ const DILocalVariable *getVar() const { return this->first; };
+ const DILocation *getInlinedAt() const { return this->second; };
+
+ bool operator<(const DebugVariable &DV) const {
+ if (getVar() == DV.getVar())
+ return getInlinedAt() < DV.getInlinedAt();
+ return getVar() < DV.getVar();
+ }
+ };
+
+ /// A pair of debug variable and value location.
+ struct VarLoc {
+ const DebugVariable Var;
+ const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
+
+ enum { InvalidKind = 0, RegisterKind } Kind;
+
+ /// The value location. Stored separately to avoid repeatedly
+ /// extracting it from MI.
+ union {
+ struct {
+ uint32_t RegNo;
+ uint32_t Offset;
+ } RegisterLoc;
+ uint64_t Hash;
+ } Loc;
+
+ VarLoc(const MachineInstr &MI)
+ : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
+ Kind(InvalidKind) {
+ static_assert((sizeof(Loc) == sizeof(uint64_t)),
+ "hash does not cover all members of Loc");
+ assert(MI.isDebugValue() && "not a DBG_VALUE");
+ assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
+ if (int RegNo = isDbgValueDescribedByReg(MI)) {
+ Kind = RegisterKind;
+ Loc.RegisterLoc.RegNo = RegNo;
+ uint64_t Offset =
+ MI.isIndirectDebugValue() ? MI.getOperand(1).getImm() : 0;
+ // We don't support offsets larger than 4GiB here. They are
+ // slated to be replaced with DIExpressions anyway.
+ if (Offset >= (1ULL << 32))
+ Kind = InvalidKind;
+ else
+ Loc.RegisterLoc.Offset = Offset;
+ }
+ }
+
+ /// If this variable is described by a register, return it,
+ /// otherwise return 0.
+ unsigned isDescribedByReg() const {
+ if (Kind == RegisterKind)
+ return Loc.RegisterLoc.RegNo;
+ return 0;
+ }
+
+ void dump() const { MI.dump(); }
+
+ bool operator==(const VarLoc &Other) const {
+ return Var == Other.Var && Loc.Hash == Other.Loc.Hash;
+ }
+
+ /// This operator guarantees that VarLocs are sorted by Variable first.
+ bool operator<(const VarLoc &Other) const {
+ if (Var == Other.Var)
+ return Loc.Hash < Other.Loc.Hash;
+ return Var < Other.Var;
+ }
+ };
+
+ typedef UniqueVector<VarLoc> VarLocMap;
+ typedef SparseBitVector<> VarLocSet;
+ typedef SmallDenseMap<const MachineBasicBlock *, VarLocSet> VarLocInMBB;
+
+ /// This holds the working set of currently open ranges. For fast
+ /// access, this is done both as a set of VarLocIDs, and a map of
+ /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
+ /// previous open ranges for the same variable.
+ class OpenRangesSet {
+ VarLocSet VarLocs;
+ SmallDenseMap<DebugVariableBase, unsigned, 8> Vars;
+
+ public:
+ const VarLocSet &getVarLocs() const { return VarLocs; }
+
+ /// Terminate all open ranges for Var by removing it from the set.
+ void erase(DebugVariable Var) {
+ auto It = Vars.find(Var);
+ if (It != Vars.end()) {
+ unsigned ID = It->second;
+ VarLocs.reset(ID);
+ Vars.erase(It);
+ }
+ }
+
+ /// Terminate all open ranges listed in \c KillSet by removing
+ /// them from the set.
+ void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) {
+ VarLocs.intersectWithComplement(KillSet);
+ for (unsigned ID : KillSet)
+ Vars.erase(VarLocIDs[ID].Var);
+ }
+
+ /// Insert a new range into the set.
+ void insert(unsigned VarLocID, DebugVariableBase Var) {
+ VarLocs.set(VarLocID);
+ Vars.insert({Var, VarLocID});
+ }
+
+ /// Empty the set.
+ void clear() {
+ VarLocs.clear();
+ Vars.clear();
+ }
+
+ /// Return whether the set is empty or not.
+ bool empty() const {
+ assert(Vars.empty() == VarLocs.empty() && "open ranges are inconsistent");
+ return VarLocs.empty();
+ }
+ };
+
+ void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs);
+ void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ const VarLocMap &VarLocIDs);
+ bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
+ bool transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs);
+
+ bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
+ const VarLocMap &VarLocIDs);
+
+ bool ExtendRanges(MachineFunction &MF);
+
+public:
+ static char ID;
+
+ /// Default construct and initialize the pass.
+ LiveDebugValues();
+
+ /// Tell the pass manager which passes we depend on and what
+ /// information we preserve.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
+ /// Print to ostream with a message.
+ void printVarLocInMBB(const MachineFunction &MF, const VarLocInMBB &V,
+ const VarLocMap &VarLocIDs, const char *msg,
+ raw_ostream &Out) const;
+
+ /// Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char LiveDebugValues::ID = 0;
+char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
+INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis",
+ false, false)
+
+/// Default construct and initialize the pass.
+LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
+ initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
+}
+
+/// Tell the pass manager which passes we depend on and what information we
+/// preserve.
+void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+// Debug Range Extension Implementation
+//===----------------------------------------------------------------------===//
+
+void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
+ const VarLocInMBB &V,
+ const VarLocMap &VarLocIDs,
+ const char *msg,
+ raw_ostream &Out) const {
+ for (const MachineBasicBlock &BB : MF) {
+ const auto &L = V.lookup(&BB);
+ Out << "MBB: " << BB.getName() << ":\n";
+ for (unsigned VLL : L) {
+ const VarLoc &VL = VarLocIDs[VLL];
+ Out << " Var: " << VL.Var.getVar()->getName();
+ Out << " MI: ";
+ VL.dump();
+ Out << "\n";
+ }
+ }
+ Out << "\n";
+}
+
+/// End all previous ranges related to @MI and start a new range from @MI
+/// if it is a DBG_VALUE instr.
+void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs) {
+ if (!MI.isDebugValue())
+ return;
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ "Expected inlined-at fields to agree");
+
+ // End all previous ranges of Var.
+ DebugVariable V(Var, InlinedAt);
+ OpenRanges.erase(V);
+
+ // Add the VarLoc to OpenRanges from this DBG_VALUE.
+ // TODO: Currently handles DBG_VALUE which has only reg as location.
+ if (isDbgValueDescribedByReg(MI)) {
+ VarLoc VL(MI);
+ unsigned ID = VarLocIDs.insert(VL);
+ OpenRanges.insert(ID, VL.Var);
+ }
+}
+
+/// A definition of a register may mark the end of a range.
+void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ const VarLocMap &VarLocIDs) {
+ MachineFunction *MF = MI.getParent()->getParent();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ SparseBitVector<> KillSet;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ TRI->isPhysicalRegister(MO.getReg())) {
+ // Remove ranges of all aliased registers.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ for (unsigned ID : OpenRanges.getVarLocs())
+ if (VarLocIDs[ID].isDescribedByReg() == *RAI)
+ KillSet.set(ID);
+ } else if (MO.isRegMask()) {
+ // Remove ranges of all clobbered registers. Register masks don't usually
+ // list SP as preserved. While the debug info may be off for an
+ // instruction or two around callee-cleanup calls, transferring the
+ // DEBUG_VALUE across the call is still a better user experience.
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ unsigned Reg = VarLocIDs[ID].isDescribedByReg();
+ if (Reg && Reg != SP && MO.clobbersPhysReg(Reg))
+ KillSet.set(ID);
+ }
+ }
+ }
+ OpenRanges.erase(KillSet, VarLocIDs);
+}
+
+/// Terminate all open ranges at the end of the current basic block.
+bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs,
+ const VarLocMap &VarLocIDs) {
+ bool Changed = false;
+ const MachineBasicBlock *CurMBB = MI.getParent();
+ if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back())))
+ return false;
+
+ if (OpenRanges.empty())
+ return false;
+
+ DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) {
+ // Copy OpenRanges to OutLocs, if not already present.
+ dbgs() << "Add to OutLocs: "; VarLocIDs[ID].dump();
+ });
+ VarLocSet &VLS = OutLocs[CurMBB];
+ Changed = VLS |= OpenRanges.getVarLocs();
+ OpenRanges.clear();
+ return Changed;
+}
+
+/// This routine creates OpenRanges and OutLocs.
+bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs) {
+ bool Changed = false;
+ transferDebugValue(MI, OpenRanges, VarLocIDs);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs);
+ Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
+ return Changed;
+}
+
+/// This routine joins the analysis results of all incoming edges in @MBB by
+/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
+/// source variable in all the predecessors of @MBB reside in the same location.
+bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
+ VarLocInMBB &InLocs, const VarLocMap &VarLocIDs) {
+ DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
+ bool Changed = false;
+
+ VarLocSet InLocsT; // Temporary incoming locations.
+
+ // For all predecessors of this MBB, find the set of VarLocs that
+ // can be joined.
+ for (auto p : MBB.predecessors()) {
+ auto OL = OutLocs.find(p);
+ // Join is null in case of empty OutLocs from any of the pred.
+ if (OL == OutLocs.end())
+ return false;
+
+ // Just copy over the Out locs to incoming locs for the first predecessor.
+ if (p == *MBB.pred_begin()) {
+ InLocsT = OL->second;
+ continue;
+ }
+ // Join with this predecessor.
+ InLocsT &= OL->second;
+ }
+
+ if (InLocsT.empty())
+ return false;
+
+ VarLocSet &ILS = InLocs[&MBB];
+
+ // Insert DBG_VALUE instructions, if not already inserted.
+ VarLocSet Diff = InLocsT;
+ Diff.intersectWithComplement(ILS);
+ for (auto ID : Diff) {
+ // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a
+ // new range is started for the var from the mbb's beginning by inserting
+ // a new DBG_VALUE. transfer() will end this range however appropriate.
+ const VarLoc &DiffIt = VarLocIDs[ID];
+ const MachineInstr *DMI = &DiffIt.MI;
+ MachineInstr *MI =
+ BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
+ DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0,
+ DMI->getDebugVariable(), DMI->getDebugExpression());
+ if (DMI->isIndirectDebugValue())
+ MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+ DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ ILS.set(ID);
+ ++NumInserted;
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// Calculate the liveness information for the given machine function and
+/// extend ranges across basic blocks.
+bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "\nDebug Range Extension\n");
+
+ bool Changed = false;
+ bool OLChanged = false;
+ bool MBBJoined = false;
+
+ VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
+ OpenRangesSet OpenRanges; // Ranges that are open until end of bb.
+ VarLocInMBB OutLocs; // Ranges that exist beyond bb.
+ VarLocInMBB InLocs; // Ranges that are incoming after joining.
+
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Pending;
+
+ // Initialize every mbb with OutLocs.
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+
+ DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization",
+ dbgs()));
+
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ unsigned int RPONumber = 0;
+ for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+ OrderToBB[RPONumber] = *RI;
+ BBToOrder[*RI] = RPONumber;
+ Worklist.push(RPONumber);
+ ++RPONumber;
+ }
+ // This is a standard "union of predecessor outs" dataflow problem.
+ // To solve it, we perform join() and transfer() using the two worklist method
+ // until the ranges converge.
+ // Ranges have converged when both worklists are empty.
+ while (!Worklist.empty() || !Pending.empty()) {
+ // We track what is on the pending worklist to avoid inserting the same
+ // thing twice. We could avoid this with a custom priority queue, but this
+ // is probably not worth it.
+ SmallPtrSet<MachineBasicBlock *, 16> OnPending;
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
+ Worklist.pop();
+ MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs);
+
+ if (MBBJoined) {
+ MBBJoined = false;
+ Changed = true;
+ for (auto &MI : *MBB)
+ OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+
+ DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
+ "OutLocs after propagating", dbgs()));
+ DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs,
+ "InLocs after propagating", dbgs()));
+
+ if (OLChanged) {
+ OLChanged = false;
+ for (auto s : MBB->successors())
+ if (OnPending.insert(s).second) {
+ Pending.push(BBToOrder[s]);
+ }
+ }
+ }
+ }
+ Worklist.swap(Pending);
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
+ }
+
+ DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs()));
+ DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs()));
+ return Changed;
+}
+
+bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ bool Changed = false;
+
+ Changed |= ExtendRanges(MF);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 000000000000..966b4f1f4e4d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,1047 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LiveDebugVariables.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <memory>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "livedebug"
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+ cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullptr) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+namespace {
+/// UserValueScopes - Keeps track of lexical scopes associated with a
+/// user value's source location.
+class UserValueScopes {
+ DebugLoc DL;
+ LexicalScopes &LS;
+ SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
+
+public:
+ UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {}
+
+ /// dominates - Return true if current scope dominates at least one machine
+ /// instruction in a given machine basic block.
+ bool dominates(MachineBasicBlock *MBB) {
+ if (LBlocks.empty())
+ LS.getMachineBasicBlocks(DL, LBlocks);
+ return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
+ }
+};
+} // end anonymous namespace
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class LDVImpl;
+class UserValue {
+ const MDNode *Variable; ///< The debug info variable we are part of.
+ const MDNode *Expression; ///< Any complex address expression.
+ unsigned offset; ///< Byte offset into variable.
+ bool IsIndirect; ///< true if this is a register-indirect+offset value.
+ DebugLoc dl; ///< The debug location for the variable. This is
+ ///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next; ///< Next value in equivalence class, or null.
+
+ /// Numbered locations referenced by locmap.
+ SmallVector<MachineOperand, 4> locations;
+
+ /// Map of slot indices where this value is live.
+ LocMap locInts;
+
+ /// coalesceLocation - After LocNo was changed, check if it has become
+ /// identical to another location, and coalesce them. This may cause LocNo or
+ /// a later location to be erased, but no earlier location will be erased.
+ void coalesceLocation(unsigned LocNo);
+
+ /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+ void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+ /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs
+ /// is live. Returns true if any changes were made.
+ bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
+ LiveIntervals &LIS);
+
+public:
+ /// UserValue - Create a new UserValue.
+ UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i,
+ DebugLoc L, LocMap::Allocator &alloc)
+ : Variable(var), Expression(expr), offset(o), IsIndirect(i),
+ dl(std::move(L)), leader(this), next(nullptr), locInts(alloc) {}
+
+ /// getLeader - Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// getNext - Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// match - Does this UserValue match the parameters?
+ bool match(const MDNode *Var, const MDNode *Expr, const DILocation *IA,
+ unsigned Offset, bool indirect) const {
+ return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA &&
+ Offset == offset && indirect == IsIndirect;
+ }
+
+ /// merge - Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next) {
+ End->leader = L1;
+ End = End->next;
+ }
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
+ }
+
+ /// getLocationNo - Return the location number that matches Loc.
+ unsigned getLocationNo(const MachineOperand &LocMO) {
+ if (LocMO.isReg()) {
+ if (LocMO.getReg() == 0)
+ return ~0u;
+ // For register locations we dont care about use/def and other flags.
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ locations[i].getReg() == LocMO.getReg() &&
+ locations[i].getSubReg() == LocMO.getSubReg())
+ return i;
+ } else
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (LocMO.isIdenticalTo(locations[i]))
+ return i;
+ locations.push_back(LocMO);
+ // We are storing a MachineOperand outside a MachineInstr.
+ locations.back().clearParent();
+ // Don't store def operands.
+ if (locations.back().isReg())
+ locations.back().setIsUse();
+ return locations.size() - 1;
+ }
+
+ /// mapVirtRegs - Ensure that all virtual register locations are mapped.
+ void mapVirtRegs(LDVImpl *LDV);
+
+ /// addDef - Add a definition point to this value.
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ // Add a singular (Idx,Idx) -> Loc mapping.
+ LocMap::iterator I = locInts.find(Idx);
+ if (!I.valid() || I.start() != Idx)
+ I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+ else
+ // A later DBG_VALUE at the same SlotIndex overrides the old location.
+ I.setValue(getLocationNo(LocMO));
+ }
+
+ /// extendDef - Extend the current definition as far as possible down the
+ /// dominator tree. Stop when meeting an existing def or when leaving the live
+ /// range of VNI.
+ /// End points where VNI is no longer live are added to Kills.
+ /// @param Idx Starting point for the definition.
+ /// @param LocNo Location number to propagate.
+ /// @param LR Restrict liveness to where LR has the value VNI. May be null.
+ /// @param VNI When LR is not null, this is the value to restrict to.
+ /// @param Kills Append end points of VNI's live range to Kills.
+ /// @param LIS Live intervals analysis.
+ /// @param MDT Dominator tree.
+ void extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveRange *LR, const VNInfo *VNI,
+ SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
+
+ /// addDefsFromCopies - The value in LI/LocNo may be copies to other
+ /// registers. Determine if any of the copies are available at the kill
+ /// points, and add defs if possible.
+ /// @param LI Scan for copies of the value in LI->reg.
+ /// @param LocNo Location number of LI->reg.
+ /// @param Kills Points where the range of LocNo could be extended.
+ /// @param NewDefs Append (Idx, LocNo) of inserted defs here.
+ void addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS);
+
+ /// computeIntervals - Compute the live intervals of all locations after
+ /// collecting all their def points.
+ void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS);
+
+ /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
+ /// live. Returns true if any changes were made.
+ bool splitRegister(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
+ LiveIntervals &LIS);
+
+ /// rewriteLocations - Rewrite virtual register locations according to the
+ /// provided virtual register map.
+ void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+ /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM,
+ LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+ /// getDebugLoc - Return DebugLoc of this UserValue.
+ DebugLoc getDebugLoc() { return dl;}
+ void print(raw_ostream &, const TargetRegisterInfo *);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+ LiveDebugVariables &pass;
+ LocMap::Allocator allocator;
+ MachineFunction *MF;
+ LiveIntervals *LIS;
+ LexicalScopes LS;
+ MachineDominatorTree *MDT;
+ const TargetRegisterInfo *TRI;
+
+ /// Whether emitDebugValues is called.
+ bool EmitDone;
+ /// Whether the machine function is modified during the pass.
+ bool ModifiedMF;
+
+ /// userValues - All allocated UserValue instances.
+ SmallVector<std::unique_ptr<UserValue>, 8> userValues;
+
+ /// Map virtual register to eq class leader.
+ typedef DenseMap<unsigned, UserValue*> VRMap;
+ VRMap virtRegToEqClass;
+
+ /// Map user variable to eq class leader.
+ typedef DenseMap<const MDNode *, UserValue*> UVMap;
+ UVMap userVarMap;
+
+ /// getUserValue - Find or create a UserValue.
+ UserValue *getUserValue(const MDNode *Var, const MDNode *Expr,
+ unsigned Offset, bool IsIndirect, const DebugLoc &DL);
+
+ /// lookupVirtReg - Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(unsigned VirtReg);
+
+ /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+ /// @param MI DBG_VALUE instruction
+ /// @param Idx Last valid SLotIndex before instruction.
+ /// @return True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
+
+ /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+ /// a UserValue def for each instruction.
+ /// @param mf MachineFunction to be scanned.
+ /// @return True if any debug values were found.
+ bool collectDebugValues(MachineFunction &mf);
+
+ /// computeIntervals - Compute the live intervals of all user values after
+ /// collecting all their def points.
+ void computeIntervals();
+
+public:
+ LDVImpl(LiveDebugVariables *ps)
+ : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {}
+ bool runOnMachineFunction(MachineFunction &mf);
+
+ /// clear - Release all memory.
+ void clear() {
+ MF = nullptr;
+ userValues.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
+ // Make sure we call emitDebugValues if the machine function was modified.
+ assert((!ModifiedMF || EmitDone) &&
+ "Dbg values are not emitted in LDV");
+ EmitDone = false;
+ ModifiedMF = false;
+ LS.reset();
+ }
+
+ /// mapVirtReg - Map virtual register to an equivalence class.
+ void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+ /// splitRegister - Replace all references to OldReg with NewRegs.
+ void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
+
+ /// emitDebugValues - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ void print(raw_ostream&);
+};
+} // namespace
+
+static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS,
+ const LLVMContext &Ctx) {
+ if (!DL)
+ return;
+
+ auto *Scope = cast<DIScope>(DL.getScope());
+ // Omit the directory, because it's likely to be long and uninteresting.
+ CommentOS << Scope->getFilename();
+ CommentOS << ':' << DL.getLine();
+ if (DL.getCol() != 0)
+ CommentOS << ':' << DL.getCol();
+
+ DebugLoc InlinedAtDL = DL.getInlinedAt();
+ if (!InlinedAtDL)
+ return;
+
+ CommentOS << " @[ ";
+ printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+ CommentOS << " ]";
+}
+
+static void printExtendedName(raw_ostream &OS, const DILocalVariable *V,
+ const DILocation *DL) {
+ const LLVMContext &Ctx = V->getContext();
+ StringRef Res = V->getName();
+ if (!Res.empty())
+ OS << Res << "," << V->getLine();
+ if (auto *InlinedAt = DL->getInlinedAt()) {
+ if (DebugLoc InlinedAtDL = InlinedAt) {
+ OS << " @[";
+ printDebugLoc(InlinedAtDL, OS, Ctx);
+ OS << "]";
+ }
+ }
+}
+
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+ auto *DV = cast<DILocalVariable>(Variable);
+ OS << "!\"";
+ printExtendedName(OS, DV, dl);
+
+ OS << "\"\t";
+ if (offset)
+ OS << '+' << offset;
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ OS << " [" << I.start() << ';' << I.stop() << "):";
+ if (I.value() == ~0u)
+ OS << "undef";
+ else
+ OS << I.value();
+ }
+ for (unsigned i = 0, e = locations.size(); i != e; ++i) {
+ OS << " Loc" << i << '=';
+ locations[i].print(OS, TRI);
+ }
+ OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+ OS << "********** DEBUG VARIABLES **********\n";
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+ userValues[i]->print(OS, TRI);
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+ unsigned KeepLoc = 0;
+ for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+ if (KeepLoc == LocNo)
+ continue;
+ if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+ break;
+ }
+ // No matches.
+ if (KeepLoc == locations.size())
+ return;
+
+ // Keep the smaller location, erase the larger one.
+ unsigned EraseLoc = LocNo;
+ if (KeepLoc > EraseLoc)
+ std::swap(KeepLoc, EraseLoc);
+ locations.erase(locations.begin() + EraseLoc);
+
+ // Rewrite values.
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ unsigned v = I.value();
+ if (v == EraseLoc)
+ I.setValue(KeepLoc); // Coalesce when possible.
+ else if (v > EraseLoc)
+ I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+ }
+}
+
+void UserValue::mapVirtRegs(LDVImpl *LDV) {
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (locations[i].isReg() &&
+ TargetRegisterInfo::isVirtualRegister(locations[i].getReg()))
+ LDV->mapVirtReg(locations[i].getReg(), this);
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
+ unsigned Offset, bool IsIndirect,
+ const DebugLoc &DL) {
+ UserValue *&Leader = userVarMap[Var];
+ if (Leader) {
+ UserValue *UV = Leader->getLeader();
+ Leader = UV;
+ for (; UV; UV = UV->getNext())
+ if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect))
+ return UV;
+ }
+
+ userValues.push_back(
+ make_unique<UserValue>(Var, Expr, Offset, IsIndirect, DL, allocator));
+ UserValue *UV = userValues.back().get();
+ Leader = UserValue::merge(Leader, UV);
+ return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
+ return nullptr;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
+ // DBG_VALUE loc, offset, variable
+ if (MI.getNumOperands() != 4 ||
+ !(MI.getOperand(1).isReg() || MI.getOperand(1).isImm()) ||
+ !MI.getOperand(2).isMetadata()) {
+ DEBUG(dbgs() << "Can't handle " << MI);
+ return false;
+ }
+
+ // Get or create the UserValue for (variable,offset).
+ bool IsIndirect = MI.isIndirectDebugValue();
+ unsigned Offset = IsIndirect ? MI.getOperand(1).getImm() : 0;
+ const MDNode *Var = MI.getDebugVariable();
+ const MDNode *Expr = MI.getDebugExpression();
+ //here.
+ UserValue *UV = getUserValue(Var, Expr, Offset, IsIndirect, MI.getDebugLoc());
+ UV->addDef(Idx, MI.getOperand(0));
+ return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+ ++MFI) {
+ MachineBasicBlock *MBB = &*MFI;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE;) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ // DBG_VALUE has no slot index, use the previous instruction instead.
+ SlotIndex Idx =
+ MBBI == MBB->begin()
+ ? LIS->getMBBStartIdx(MBB)
+ : LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot();
+ // Handle consecutive DBG_VALUE instructions with the same slot index.
+ do {
+ if (handleDebugValue(*MBBI, Idx)) {
+ MBBI = MBB->erase(MBBI);
+ Changed = true;
+ } else
+ ++MBBI;
+ } while (MBBI != MBBE && MBBI->isDebugValue());
+ }
+ }
+ return Changed;
+}
+
+/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
+/// data-flow analysis to propagate them beyond basic block boundaries.
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
+ const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
+ LiveIntervals &LIS, MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
+ SlotIndex Start = Idx;
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to VNI's live range.
+ bool ToEnd = true;
+ if (LR && VNI) {
+ LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
+ if (!Segment || Segment->valno != VNI) {
+ if (Kills)
+ Kills->push_back(Start);
+ return;
+ }
+ if (Segment->end < Stop) {
+ Stop = Segment->end;
+ ToEnd = false;
+ }
+ }
+
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != LocNo || I.stop() != Start)
+ return;
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
+
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop) {
+ Stop = I.start();
+ ToEnd = false;
+ }
+ // Limited by VNI's live range.
+ else if (!ToEnd && Kills)
+ Kills->push_back(Stop);
+
+ if (Start < Stop)
+ I.insert(Start, Stop, LocNo);
+}
+
+void
+UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
+ const SmallVectorImpl<SlotIndex> &Kills,
+ SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs,
+ MachineRegisterInfo &MRI, LiveIntervals &LIS) {
+ if (Kills.empty())
+ return;
+ // Don't track copies from physregs, there are too many uses.
+ if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
+ return;
+
+ // Collect all the (vreg, valno) pairs that are copies of LI.
+ SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
+ for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) {
+ MachineInstr *MI = MO.getParent();
+ // Copies of the full value.
+ if (MO.getSubReg() || !MI->isCopy())
+ continue;
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // Don't follow copies to physregs. These are usually setting up call
+ // arguments, and the argument registers are always call clobbered. We are
+ // better off in the source register which could be a callee-saved register,
+ // or it could be spilled.
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ continue;
+
+ // Is LocNo extended to reach this copy? If not, another def may be blocking
+ // it, or we are looking at a wrong value of LI.
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
+ LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
+ if (!I.valid() || I.value() != LocNo)
+ continue;
+
+ if (!LIS.hasInterval(DstReg))
+ continue;
+ LiveInterval *DstLI = &LIS.getInterval(DstReg);
+ const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
+ CopyValues.push_back(std::make_pair(DstLI, DstVNI));
+ }
+
+ if (CopyValues.empty())
+ return;
+
+ DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n');
+
+ // Try to add defs of the copied values for each kill point.
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+ SlotIndex Idx = Kills[i];
+ for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) {
+ LiveInterval *DstLI = CopyValues[j].first;
+ const VNInfo *DstVNI = CopyValues[j].second;
+ if (DstLI->getVNInfoAt(Idx) != DstVNI)
+ continue;
+ // Check that there isn't already a def at Idx
+ LocMap::iterator I = locInts.find(Idx);
+ if (I.valid() && I.start() <= Idx)
+ continue;
+ DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #"
+ << DstVNI->id << " in " << *DstLI << '\n');
+ MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
+ assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
+ unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
+ I.insert(Idx, Idx.getNextSlot(), LocNo);
+ NewDefs.push_back(std::make_pair(Idx, LocNo));
+ break;
+ }
+ }
+}
+
+void
+UserValue::computeIntervals(MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ LiveIntervals &LIS,
+ MachineDominatorTree &MDT,
+ UserValueScopes &UVS) {
+ SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+ // Collect all defs to be extended (Skipping undefs).
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+ if (I.value() != ~0u)
+ Defs.push_back(std::make_pair(I.start(), I.value()));
+
+ // Extend all defs, and possibly add new ones along the way.
+ for (unsigned i = 0; i != Defs.size(); ++i) {
+ SlotIndex Idx = Defs[i].first;
+ unsigned LocNo = Defs[i].second;
+ const MachineOperand &Loc = locations[LocNo];
+
+ if (!Loc.isReg()) {
+ extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS, MDT, UVS);
+ continue;
+ }
+
+ // Register locations are constrained to where the register value is live.
+ if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
+ LiveInterval *LI = nullptr;
+ const VNInfo *VNI = nullptr;
+ if (LIS.hasInterval(Loc.getReg())) {
+ LI = &LIS.getInterval(Loc.getReg());
+ VNI = LI->getVNInfoAt(Idx);
+ }
+ SmallVector<SlotIndex, 16> Kills;
+ extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
+ if (LI)
+ addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
+ continue;
+ }
+
+ // For physregs, use the live range of the first regunit as a guide.
+ unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI);
+ LiveRange *LR = &LIS.getRegUnit(Unit);
+ const VNInfo *VNI = LR->getVNInfoAt(Idx);
+ // Don't track copies from physregs, it is too expensive.
+ extendDef(Idx, LocNo, LR, VNI, nullptr, LIS, MDT, UVS);
+ }
+
+ // Finally, erase all the undefs.
+ for (LocMap::iterator I = locInts.begin(); I.valid();)
+ if (I.value() == ~0u)
+ I.erase();
+ else
+ ++I;
+}
+
+void LDVImpl::computeIntervals() {
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
+ userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS);
+ userValues[i]->mapVirtRegs(this);
+ }
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+ clear();
+ MF = &mf;
+ LIS = &pass.getAnalysis<LiveIntervals>();
+ MDT = &pass.getAnalysis<MachineDominatorTree>();
+ TRI = mf.getSubtarget().getRegisterInfo();
+ LS.initialize(mf);
+ DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+ << mf.getName() << " **********\n");
+
+ bool Changed = collectDebugValues(mf);
+ computeIntervals();
+ DEBUG(print(dbgs()));
+ ModifiedMF = Changed;
+ return Changed;
+}
+
+static void removeDebugValues(MachineFunction &mf) {
+ for (MachineBasicBlock &MBB : mf) {
+ for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ MBBI = MBB.erase(MBBI);
+ }
+ }
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+ if (!EnableLDV)
+ return false;
+ if (!mf.getFunction()->getSubprogram()) {
+ removeDebugValues(mf);
+ return false;
+ }
+ if (!pImpl)
+ pImpl = new LDVImpl(this);
+ return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+ if (pImpl)
+ delete static_cast<LDVImpl*>(pImpl);
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+bool
+UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
+ LiveIntervals& LIS) {
+ DEBUG({
+ dbgs() << "Splitting Loc" << OldLocNo << '\t';
+ print(dbgs(), nullptr);
+ });
+ bool DidChange = false;
+ LocMap::iterator LocMapI;
+ LocMapI.setMap(locInts);
+ for (unsigned i = 0; i != NewRegs.size(); ++i) {
+ LiveInterval *LI = &LIS.getInterval(NewRegs[i]);
+ if (LI->empty())
+ continue;
+
+ // Don't allocate the new LocNo until it is needed.
+ unsigned NewLocNo = ~0u;
+
+ // Iterate over the overlaps between locInts and LI.
+ LocMapI.find(LI->beginIndex());
+ if (!LocMapI.valid())
+ continue;
+ LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start());
+ LiveInterval::iterator LIE = LI->end();
+ while (LocMapI.valid() && LII != LIE) {
+ // At this point, we know that LocMapI.stop() > LII->start.
+ LII = LI->advanceTo(LII, LocMapI.start());
+ if (LII == LIE)
+ break;
+
+ // Now LII->end > LocMapI.start(). Do we have an overlap?
+ if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) {
+ // Overlapping correct location. Allocate NewLocNo now.
+ if (NewLocNo == ~0u) {
+ MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
+ MO.setSubReg(locations[OldLocNo].getSubReg());
+ NewLocNo = getLocationNo(MO);
+ DidChange = true;
+ }
+
+ SlotIndex LStart = LocMapI.start();
+ SlotIndex LStop = LocMapI.stop();
+
+ // Trim LocMapI down to the LII overlap.
+ if (LStart < LII->start)
+ LocMapI.setStartUnchecked(LII->start);
+ if (LStop > LII->end)
+ LocMapI.setStopUnchecked(LII->end);
+
+ // Change the value in the overlap. This may trigger coalescing.
+ LocMapI.setValue(NewLocNo);
+
+ // Re-insert any removed OldLocNo ranges.
+ if (LStart < LocMapI.start()) {
+ LocMapI.insert(LStart, LocMapI.start(), OldLocNo);
+ ++LocMapI;
+ assert(LocMapI.valid() && "Unexpected coalescing");
+ }
+ if (LStop > LocMapI.stop()) {
+ ++LocMapI;
+ LocMapI.insert(LII->end, LStop, OldLocNo);
+ --LocMapI;
+ }
+ }
+
+ // Advance to the next overlap.
+ if (LII->end < LocMapI.stop()) {
+ if (++LII == LIE)
+ break;
+ LocMapI.advanceTo(LII->start);
+ } else {
+ ++LocMapI;
+ if (!LocMapI.valid())
+ break;
+ LII = LI->advanceTo(LII, LocMapI.start());
+ }
+ }
+ }
+
+ // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
+ locations.erase(locations.begin() + OldLocNo);
+ LocMapI.goToBegin();
+ while (LocMapI.valid()) {
+ unsigned v = LocMapI.value();
+ if (v == OldLocNo) {
+ DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
+ << LocMapI.stop() << ")\n");
+ LocMapI.erase();
+ } else {
+ if (v > OldLocNo)
+ LocMapI.setValueUnchecked(v-1);
+ ++LocMapI;
+ }
+ }
+
+ DEBUG({dbgs() << "Split result: \t"; print(dbgs(), nullptr);});
+ return DidChange;
+}
+
+bool
+UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
+ LiveIntervals &LIS) {
+ bool DidChange = false;
+ // Split locations referring to OldReg. Iterate backwards so splitLocation can
+ // safely erase unused locations.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ const MachineOperand *Loc = &locations[LocNo];
+ if (!Loc->isReg() || Loc->getReg() != OldReg)
+ continue;
+ DidChange |= splitLocation(LocNo, NewRegs, LIS);
+ }
+ return DidChange;
+}
+
+void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) {
+ bool DidChange = false;
+ for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+ DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
+
+ if (!DidChange)
+ return;
+
+ // Map all of the new virtual registers.
+ UserValue *UV = lookupVirtReg(OldReg);
+ for (unsigned i = 0; i != NewRegs.size(); ++i)
+ mapVirtReg(NewRegs[i], UV);
+}
+
+void LiveDebugVariables::
+splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+ // Iterate over locations in reverse makes it easier to handle coalescing.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ MachineOperand &Loc = locations[LocNo];
+ // Only virtual registers are rewritten.
+ if (!Loc.isReg() || !Loc.getReg() ||
+ !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+ continue;
+ unsigned VirtReg = Loc.getReg();
+ if (VRM.isAssignedReg(VirtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ // This can create a %noreg operand in rare cases when the sub-register
+ // index is no longer available. That means the user value is in a
+ // non-existent sub-register, and %noreg is exactly what we want.
+ Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
+ // FIXME: Translate SubIdx to a stackslot offset.
+ Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+ } else {
+ Loc.setReg(0);
+ Loc.setSubReg(0);
+ }
+ coalesceLocation(LocNo);
+ }
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS) {
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ Idx = Idx.getBaseIndex();
+
+ // Try to find an insert location by going backwards from Idx.
+ MachineInstr *MI;
+ while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+ // We've reached the beginning of MBB.
+ if (Idx == Start) {
+ MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+ return I;
+ }
+ Idx = Idx.getPrevIndex();
+ }
+
+ // Don't insert anything after the first terminator, though.
+ return MI->isTerminator() ? MBB->getFirstTerminator() :
+ std::next(MachineBasicBlock::iterator(MI));
+}
+
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+ unsigned LocNo,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ MachineOperand &Loc = locations[LocNo];
+ ++NumInsertedDebugValues;
+
+ assert(cast<DILocalVariable>(Variable)
+ ->isValidLocationForIntrinsic(getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ if (Loc.isReg())
+ BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
+ IsIndirect, Loc.getReg(), offset, Variable, Expression);
+ else
+ BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(Loc)
+ .addImm(offset)
+ .addMetadata(Variable)
+ .addMetadata(Expression);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+ for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+ SlotIndex Start = I.start();
+ SlotIndex Stop = I.stop();
+ unsigned LocNo = I.value();
+ DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
+
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
+ // This interval may span multiple basic blocks.
+ // Insert a DBG_VALUE into each one.
+ while(Stop > MBBEnd) {
+ // Move to the next block.
+ Start = MBBEnd;
+ if (++MBB == MFEnd)
+ break;
+ MBBEnd = LIS.getMBBEndIdx(&*MBB);
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
+ }
+ DEBUG(dbgs() << '\n');
+ if (MBB == MFEnd)
+ break;
+
+ ++I;
+ }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+ DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ if (!MF)
+ return;
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ DEBUG(userValues[i]->print(dbgs(), TRI));
+ userValues[i]->rewriteLocations(*VRM, *TRI);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+ }
+ EmitDone = true;
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+bool LiveDebugVariables::doInitialization(Module &M) {
+ return Pass::doInitialization(M);
+}
+
+#ifndef NDEBUG
+LLVM_DUMP_METHOD void LiveDebugVariables::dump() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 000000000000..afe87a52544d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,75 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/DebugInfo.h"
+
+namespace llvm {
+
+template <typename T> class ArrayRef;
+class LiveInterval;
+class LiveIntervals;
+class VirtRegMap;
+
+class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
+ void *pImpl;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ LiveDebugVariables();
+ ~LiveDebugVariables() override;
+
+ /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+ /// @param OldReg Old virtual register that is going away.
+ /// @param NewReg New register holding the user variables.
+ /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+ /// register.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// splitRegister - Move any user variables in OldReg to the live ranges in
+ /// NewRegs where they are live. Mark the values as unavailable where no new
+ /// register is live.
+ void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
+ LiveIntervals &LIS);
+
+ /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+ /// that happened during register allocation.
+ /// @param VRM Rename virtual registers according to map.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ /// dump - Print data structures to dbgs().
+ void dump();
+
+private:
+
+ bool runOnMachineFunction(MachineFunction &) override;
+ void releaseMemory() override;
+ void getAnalysisUsage(AnalysisUsage &) const override;
+ bool doInitialization(Module &) override;
+
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 000000000000..93c5ca785ac9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,1250 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes. Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live range for register v if there is no instruction with number j' >= j
+// such that v is live at j' and there is no instruction with number i' < i such
+// that v is live at i'. In this implementation ranges can have holes,
+// i.e. a range might look like [1,20), [50,65), [1000,1001). Each
+// individual segment is represented as an instance of LiveRange::Segment,
+// and the whole range is represented as an instance of LiveRange.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+
+#include "LiveRangeUtils.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+//===----------------------------------------------------------------------===//
+// Implementation of various methods necessary for calculation of live ranges.
+// The implementation of the methods abstracts from the concrete type of the
+// segment collection.
+//
+// Implementation of the class follows the Template design pattern. The base
+// class contains generic algorithms that call collection-specific methods,
+// which are provided in concrete subclasses. In order to avoid virtual calls
+// these methods are provided by means of C++ template instantiation.
+// The base class calls the methods of the subclass through method impl(),
+// which casts 'this' pointer to the type of the subclass.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename ImplT, typename IteratorT, typename CollectionT>
+class CalcLiveRangeUtilBase {
+protected:
+ LiveRange *LR;
+
+protected:
+ CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {}
+
+public:
+ typedef LiveRange::Segment Segment;
+ typedef IteratorT iterator;
+
+ VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) {
+ assert(!Def.isDead() && "Cannot define a value at the dead slot");
+
+ iterator I = impl().find(Def);
+ if (I == segments().end()) {
+ VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+ impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+
+ Segment *S = segmentAt(I);
+ if (SlotIndex::isSameInstr(Def, S->start)) {
+ assert(S->valno->def == S->start && "Inconsistent existing value def");
+
+ // It is possible to have both normal and early-clobber defs of the same
+ // register on an instruction. It doesn't make a lot of sense, but it is
+ // possible to specify in inline assembly.
+ //
+ // Just convert everything to early-clobber.
+ Def = std::min(Def, S->start);
+ if (Def != S->start)
+ S->start = S->valno->def = Def;
+ return S->valno;
+ }
+ assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def");
+ VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+ segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI));
+ return VNI;
+ }
+
+ VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Use) {
+ if (segments().empty())
+ return nullptr;
+ iterator I =
+ impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
+ if (I == segments().begin())
+ return nullptr;
+ --I;
+ if (I->end <= StartIdx)
+ return nullptr;
+ if (I->end < Use)
+ extendSegmentEndTo(I, Use);
+ return I->valno;
+ }
+
+ /// This method is used when we want to extend the segment specified
+ /// by I to end at the specified endpoint. To do this, we should
+ /// merge and eliminate all segments that this will overlap
+ /// with. The iterator is not invalidated.
+ void extendSegmentEndTo(iterator I, SlotIndex NewEnd) {
+ assert(I != segments().end() && "Not a valid segment!");
+ Segment *S = segmentAt(I);
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first segment that we can't merge with.
+ iterator MergeTo = std::next(I);
+ for (; MergeTo != segments().end() && NewEnd >= MergeTo->end; ++MergeTo)
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+
+ // If NewEnd was in the middle of a segment, make sure to get its endpoint.
+ S->end = std::max(NewEnd, std::prev(MergeTo)->end);
+
+ // If the newly formed segment now touches the segment after it and if they
+ // have the same value number, merge the two segments into one segment.
+ if (MergeTo != segments().end() && MergeTo->start <= I->end &&
+ MergeTo->valno == ValNo) {
+ S->end = MergeTo->end;
+ ++MergeTo;
+ }
+
+ // Erase any dead segments.
+ segments().erase(std::next(I), MergeTo);
+ }
+
+ /// This method is used when we want to extend the segment specified
+ /// by I to start at the specified endpoint. To do this, we should
+ /// merge and eliminate all segments that this will overlap with.
+ iterator extendSegmentStartTo(iterator I, SlotIndex NewStart) {
+ assert(I != segments().end() && "Not a valid segment!");
+ Segment *S = segmentAt(I);
+ VNInfo *ValNo = I->valno;
+
+ // Search for the first segment that we can't merge with.
+ iterator MergeTo = I;
+ do {
+ if (MergeTo == segments().begin()) {
+ S->start = NewStart;
+ segments().erase(MergeTo, I);
+ return I;
+ }
+ assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+ --MergeTo;
+ } while (NewStart <= MergeTo->start);
+
+ // If we start in the middle of another segment, just delete a range and
+ // extend that segment.
+ if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+ segmentAt(MergeTo)->end = S->end;
+ } else {
+ // Otherwise, extend the segment right after.
+ ++MergeTo;
+ Segment *MergeToSeg = segmentAt(MergeTo);
+ MergeToSeg->start = NewStart;
+ MergeToSeg->end = S->end;
+ }
+
+ segments().erase(std::next(MergeTo), std::next(I));
+ return MergeTo;
+ }
+
+ iterator addSegment(Segment S) {
+ SlotIndex Start = S.start, End = S.end;
+ iterator I = impl().findInsertPos(S);
+
+ // If the inserted segment starts in the middle or right at the end of
+ // another segment, just extend that segment to contain the segment of S.
+ if (I != segments().begin()) {
+ iterator B = std::prev(I);
+ if (S.valno == B->valno) {
+ if (B->start <= Start && B->end >= Start) {
+ extendSegmentEndTo(B, End);
+ return B;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live segments with
+ // different valno's.
+ assert(B->end <= Start &&
+ "Cannot overlap two segments with differing ValID's"
+ " (did you def the same reg twice in a MachineInstr?)");
+ }
+ }
+
+ // Otherwise, if this segment ends in the middle of, or right next
+ // to, another segment, merge it into that segment.
+ if (I != segments().end()) {
+ if (S.valno == I->valno) {
+ if (I->start <= End) {
+ I = extendSegmentStartTo(I, Start);
+
+ // If S is a complete superset of a segment, we may need to grow its
+ // endpoint as well.
+ if (End > I->end)
+ extendSegmentEndTo(I, End);
+ return I;
+ }
+ } else {
+ // Check to make sure that we are not overlapping two live segments with
+ // different valno's.
+ assert(I->start >= End &&
+ "Cannot overlap two segments with differing ValID's");
+ }
+ }
+
+ // Otherwise, this is just a new segment that doesn't interact with
+ // anything.
+ // Insert it.
+ return segments().insert(I, S);
+ }
+
+private:
+ ImplT &impl() { return *static_cast<ImplT *>(this); }
+
+ CollectionT &segments() { return impl().segmentsColl(); }
+
+ Segment *segmentAt(iterator I) { return const_cast<Segment *>(&(*I)); }
+};
+
+//===----------------------------------------------------------------------===//
+// Instantiation of the methods for calculation of live ranges
+// based on a segment vector.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilVector;
+typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator,
+ LiveRange::Segments> CalcLiveRangeUtilVectorBase;
+
+class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase {
+public:
+ CalcLiveRangeUtilVector(LiveRange *LR) : CalcLiveRangeUtilVectorBase(LR) {}
+
+private:
+ friend CalcLiveRangeUtilVectorBase;
+
+ LiveRange::Segments &segmentsColl() { return LR->segments; }
+
+ void insertAtEnd(const Segment &S) { LR->segments.push_back(S); }
+
+ iterator find(SlotIndex Pos) { return LR->find(Pos); }
+
+ iterator findInsertPos(Segment S) {
+ return std::upper_bound(LR->begin(), LR->end(), S.start);
+ }
+};
+
+//===----------------------------------------------------------------------===//
+// Instantiation of the methods for calculation of live ranges
+// based on a segment set.
+//===----------------------------------------------------------------------===//
+
+class CalcLiveRangeUtilSet;
+typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet,
+ LiveRange::SegmentSet::iterator,
+ LiveRange::SegmentSet> CalcLiveRangeUtilSetBase;
+
+class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase {
+public:
+ CalcLiveRangeUtilSet(LiveRange *LR) : CalcLiveRangeUtilSetBase(LR) {}
+
+private:
+ friend CalcLiveRangeUtilSetBase;
+
+ LiveRange::SegmentSet &segmentsColl() { return *LR->segmentSet; }
+
+ void insertAtEnd(const Segment &S) {
+ LR->segmentSet->insert(LR->segmentSet->end(), S);
+ }
+
+ iterator find(SlotIndex Pos) {
+ iterator I =
+ LR->segmentSet->upper_bound(Segment(Pos, Pos.getNextSlot(), nullptr));
+ if (I == LR->segmentSet->begin())
+ return I;
+ iterator PrevI = std::prev(I);
+ if (Pos < (*PrevI).end)
+ return PrevI;
+ return I;
+ }
+
+ iterator findInsertPos(Segment S) {
+ iterator I = LR->segmentSet->upper_bound(S);
+ if (I != LR->segmentSet->end() && !(S.start < *I))
+ ++I;
+ return I;
+ }
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// LiveRange methods
+//===----------------------------------------------------------------------===//
+
+LiveRange::iterator LiveRange::find(SlotIndex Pos) {
+ // This algorithm is basically std::upper_bound.
+ // Unfortunately, std::upper_bound cannot be used with mixed types until we
+ // adopt C++0x. Many libraries can do it, but not all.
+ if (empty() || Pos >= endIndex())
+ return end();
+ iterator I = begin();
+ size_t Len = size();
+ do {
+ size_t Mid = Len >> 1;
+ if (Pos < I[Mid].end) {
+ Len = Mid;
+ } else {
+ I += Mid + 1;
+ Len -= Mid + 1;
+ }
+ } while (Len);
+ return I;
+}
+
+VNInfo *LiveRange::createDeadDef(SlotIndex Def,
+ VNInfo::Allocator &VNInfoAllocator) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator);
+}
+
+// overlaps - Return true if the intersection of the two live ranges is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live ranges should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveRange::overlapsFrom(const LiveRange& other,
+ const_iterator StartPos) const {
+ assert(!empty() && "empty range");
+ const_iterator i = begin();
+ const_iterator ie = end();
+ const_iterator j = StartPos;
+ const_iterator je = other.end();
+
+ assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+ StartPos != other.end() && "Bogus start position hint!");
+
+ if (i->start < j->start) {
+ i = std::upper_bound(i, ie, j->start);
+ if (i != begin()) --i;
+ } else if (j->start < i->start) {
+ ++StartPos;
+ if (StartPos != other.end() && StartPos->start <= i->start) {
+ assert(StartPos < other.end() && i < end());
+ j = std::upper_bound(j, je, i->start);
+ if (j != other.begin()) --j;
+ }
+ } else {
+ return true;
+ }
+
+ if (j == je) return false;
+
+ while (i != ie) {
+ if (i->start > j->start) {
+ std::swap(i, j);
+ std::swap(ie, je);
+ }
+
+ if (i->end > j->start)
+ return true;
+ ++i;
+ }
+
+ return false;
+}
+
+bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
+ const SlotIndexes &Indexes) const {
+ assert(!empty() && "empty range");
+ if (Other.empty())
+ return false;
+
+ // Use binary searches to find initial positions.
+ const_iterator I = find(Other.beginIndex());
+ const_iterator IE = end();
+ if (I == IE)
+ return false;
+ const_iterator J = Other.find(I->start);
+ const_iterator JE = Other.end();
+ if (J == JE)
+ return false;
+
+ for (;;) {
+ // J has just been advanced to satisfy:
+ assert(J->end >= I->start);
+ // Check for an overlap.
+ if (J->start < I->end) {
+ // I and J are overlapping. Find the later start.
+ SlotIndex Def = std::max(I->start, J->start);
+ // Allow the overlap if Def is a coalescable copy.
+ if (Def.isBlock() ||
+ !CP.isCoalescable(Indexes.getInstructionFromIndex(Def)))
+ return true;
+ }
+ // Advance the iterator that ends first to check for more overlaps.
+ if (J->end > I->end) {
+ std::swap(I, J);
+ std::swap(IE, JE);
+ }
+ // Advance J until J->end >= I->start.
+ do
+ if (++J == JE)
+ return false;
+ while (J->end < I->start);
+ }
+}
+
+/// overlaps - Return true if the live range overlaps an interval specified
+/// by [Start, End).
+bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const {
+ assert(Start < End && "Invalid range");
+ const_iterator I = std::lower_bound(begin(), end(), End);
+ return I != begin() && (--I)->end > Start;
+}
+
+bool LiveRange::covers(const LiveRange &Other) const {
+ if (empty())
+ return Other.empty();
+
+ const_iterator I = begin();
+ for (const Segment &O : Other.segments) {
+ I = advanceTo(I, O.start);
+ if (I == end() || I->start > O.start)
+ return false;
+
+ // Check adjacent live segments and see if we can get behind O.end.
+ while (I->end < O.end) {
+ const_iterator Last = I;
+ // Get next segment and abort if it was not adjacent.
+ ++I;
+ if (I == end() || Last->end != I->start)
+ return false;
+ }
+ }
+ return true;
+}
+
+/// ValNo is dead, remove it. If it is the largest value number, just nuke it
+/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
+/// it can be nuked later.
+void LiveRange::markValNoForDeletion(VNInfo *ValNo) {
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ valnos.pop_back();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ ValNo->markUnused();
+ }
+}
+
+/// RenumberValues - Renumber all values in order of appearance and delete the
+/// remaining unused values.
+void LiveRange::RenumberValues() {
+ SmallPtrSet<VNInfo*, 8> Seen;
+ valnos.clear();
+ for (const Segment &S : segments) {
+ VNInfo *VNI = S.valno;
+ if (!Seen.insert(VNI).second)
+ continue;
+ assert(!VNI->isUnused() && "Unused valno used by live segment");
+ VNI->id = (unsigned)valnos.size();
+ valnos.push_back(VNI);
+ }
+}
+
+void LiveRange::addSegmentToSet(Segment S) {
+ CalcLiveRangeUtilSet(this).addSegment(S);
+}
+
+LiveRange::iterator LiveRange::addSegment(Segment S) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr) {
+ addSegmentToSet(S);
+ return end();
+ }
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).addSegment(S);
+}
+
+void LiveRange::append(const Segment S) {
+ // Check that the segment belongs to the back of the list.
+ assert(segments.empty() || segments.back().end <= S.start);
+ segments.push_back(S);
+}
+
+/// extendInBlock - If this range is live before Kill in the basic
+/// block that starts at StartIdx, extend it to be live up to Kill and return
+/// the value. If there is no live range before Kill, return NULL.
+VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
+ // Use the segment set, if it is available.
+ if (segmentSet != nullptr)
+ return CalcLiveRangeUtilSet(this).extendInBlock(StartIdx, Kill);
+ // Otherwise use the segment vector.
+ return CalcLiveRangeUtilVector(this).extendInBlock(StartIdx, Kill);
+}
+
+/// Remove the specified segment from this range. Note that the segment must
+/// be in a single Segment in its entirety.
+void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
+ bool RemoveDeadValNo) {
+ // Find the Segment containing this span.
+ iterator I = find(Start);
+ assert(I != end() && "Segment is not in range!");
+ assert(I->containsInterval(Start, End)
+ && "Segment is not entirely in range!");
+
+ // If the span we are removing is at the start of the Segment, adjust it.
+ VNInfo *ValNo = I->valno;
+ if (I->start == Start) {
+ if (I->end == End) {
+ if (RemoveDeadValNo) {
+ // Check if val# is dead.
+ bool isDead = true;
+ for (const_iterator II = begin(), EE = end(); II != EE; ++II)
+ if (II != I && II->valno == ValNo) {
+ isDead = false;
+ break;
+ }
+ if (isDead) {
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
+ }
+ }
+
+ segments.erase(I); // Removed the whole Segment.
+ } else
+ I->start = End;
+ return;
+ }
+
+ // Otherwise if the span we are removing is at the end of the Segment,
+ // adjust the other way.
+ if (I->end == End) {
+ I->end = Start;
+ return;
+ }
+
+ // Otherwise, we are splitting the Segment into two pieces.
+ SlotIndex OldEnd = I->end;
+ I->end = Start; // Trim the old segment.
+
+ // Insert the new one.
+ segments.insert(std::next(I), Segment(End, OldEnd, ValNo));
+}
+
+/// removeValNo - Remove all the segments defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveRange::removeValNo(VNInfo *ValNo) {
+ if (empty()) return;
+ segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) {
+ return S.valno == ValNo;
+ }), end());
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
+}
+
+void LiveRange::join(LiveRange &Other,
+ const int *LHSValNoAssignments,
+ const int *RHSValNoAssignments,
+ SmallVectorImpl<VNInfo *> &NewVNInfo) {
+ verify();
+
+ // Determine if any of our values are mapped. This is uncommon, so we want
+ // to avoid the range scan if not.
+ bool MustMapCurValNos = false;
+ unsigned NumVals = getNumValNums();
+ unsigned NumNewVals = NewVNInfo.size();
+ for (unsigned i = 0; i != NumVals; ++i) {
+ unsigned LHSValID = LHSValNoAssignments[i];
+ if (i != LHSValID ||
+ (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) {
+ MustMapCurValNos = true;
+ break;
+ }
+ }
+
+ // If we have to apply a mapping to our base range assignment, rewrite it now.
+ if (MustMapCurValNos && !empty()) {
+ // Map the first live range.
+
+ iterator OutIt = begin();
+ OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+ for (iterator I = std::next(OutIt), E = end(); I != E; ++I) {
+ VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+ assert(nextValNo && "Huh?");
+
+ // If this live range has the same value # as its immediate predecessor,
+ // and if they are neighbors, remove one Segment. This happens when we
+ // have [0,4:0)[4,7:1) and map 0/1 onto the same value #.
+ if (OutIt->valno == nextValNo && OutIt->end == I->start) {
+ OutIt->end = I->end;
+ } else {
+ // Didn't merge. Move OutIt to the next segment,
+ ++OutIt;
+ OutIt->valno = nextValNo;
+ if (OutIt != I) {
+ OutIt->start = I->start;
+ OutIt->end = I->end;
+ }
+ }
+ }
+ // If we merge some segments, chop off the end.
+ ++OutIt;
+ segments.erase(OutIt, end());
+ }
+
+ // Rewrite Other values before changing the VNInfo ids.
+ // This can leave Other in an invalid state because we're not coalescing
+ // touching segments that now have identical values. That's OK since Other is
+ // not supposed to be valid after calling join();
+ for (Segment &S : Other.segments)
+ S.valno = NewVNInfo[RHSValNoAssignments[S.valno->id]];
+
+ // Update val# info. Renumber them and make sure they all belong to this
+ // LiveRange now. Also remove dead val#'s.
+ unsigned NumValNos = 0;
+ for (unsigned i = 0; i < NumNewVals; ++i) {
+ VNInfo *VNI = NewVNInfo[i];
+ if (VNI) {
+ if (NumValNos >= NumVals)
+ valnos.push_back(VNI);
+ else
+ valnos[NumValNos] = VNI;
+ VNI->id = NumValNos++; // Renumber val#.
+ }
+ }
+ if (NumNewVals < NumVals)
+ valnos.resize(NumNewVals); // shrinkify
+
+ // Okay, now insert the RHS live segments into the LHS.
+ LiveRangeUpdater Updater(this);
+ for (Segment &S : Other.segments)
+ Updater.add(S);
+}
+
+/// Merge all of the segments in RHS into this live range as the specified
+/// value number. The segments in RHS are allowed to overlap with segments in
+/// the current range, but only if the overlapping segments have the
+/// specified value number.
+void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS,
+ VNInfo *LHSValNo) {
+ LiveRangeUpdater Updater(this);
+ for (const Segment &S : RHS.segments)
+ Updater.add(S.start, S.end, LHSValNo);
+}
+
+/// MergeValueInAsValue - Merge all of the live segments of a specific val#
+/// in RHS into this live range as the specified value number.
+/// The segments in RHS are allowed to overlap with segments in the
+/// current range, it will replace the value numbers of the overlaped
+/// segments with the specified value number.
+void LiveRange::MergeValueInAsValue(const LiveRange &RHS,
+ const VNInfo *RHSValNo,
+ VNInfo *LHSValNo) {
+ LiveRangeUpdater Updater(this);
+ for (const Segment &S : RHS.segments)
+ if (S.valno == RHSValNo)
+ Updater.add(S.start, S.end, LHSValNo);
+}
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent. This eliminates V1, replacing all
+/// segments with the V1 value number with the V2 value number. This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+ assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+ // This code actually merges the (numerically) larger value number into the
+ // smaller value number, which is likely to allow us to compactify the value
+ // space. The only thing we have to be careful of is to preserve the
+ // instruction that defines the result value.
+
+ // Make sure V2 is smaller than V1.
+ if (V1->id < V2->id) {
+ V1->copyFrom(*V2);
+ std::swap(V1, V2);
+ }
+
+ // Merge V1 segments into V2.
+ for (iterator I = begin(); I != end(); ) {
+ iterator S = I++;
+ if (S->valno != V1) continue; // Not a V1 Segment.
+
+ // Okay, we found a V1 live range. If it had a previous, touching, V2 live
+ // range, extend it.
+ if (S != begin()) {
+ iterator Prev = S-1;
+ if (Prev->valno == V2 && Prev->end == S->start) {
+ Prev->end = S->end;
+
+ // Erase this live-range.
+ segments.erase(S);
+ I = Prev+1;
+ S = Prev;
+ }
+ }
+
+ // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+ // Ensure that it is a V2 live-range.
+ S->valno = V2;
+
+ // If we can merge it into later V2 segments, do so now. We ignore any
+ // following V1 segments, as they will be merged in subsequent iterations
+ // of the loop.
+ if (I != end()) {
+ if (I->start == S->end && I->valno == V2) {
+ S->end = I->end;
+ segments.erase(I);
+ I = S+1;
+ }
+ }
+ }
+
+ // Now that V1 is dead, remove it.
+ markValNoForDeletion(V1);
+
+ return V2;
+}
+
+void LiveRange::flushSegmentSet() {
+ assert(segmentSet != nullptr && "segment set must have been created");
+ assert(
+ segments.empty() &&
+ "segment set can be used only initially before switching to the array");
+ segments.append(segmentSet->begin(), segmentSet->end());
+ segmentSet = nullptr;
+ verify();
+}
+
+bool LiveRange::isLiveAtIndexes(ArrayRef<SlotIndex> Slots) const {
+ ArrayRef<SlotIndex>::iterator SlotI = Slots.begin();
+ ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+ // If there are no regmask slots, we have nothing to search.
+ if (SlotI == SlotE)
+ return false;
+
+ // Start our search at the first segment that ends after the first slot.
+ const_iterator SegmentI = find(*SlotI);
+ const_iterator SegmentE = end();
+
+ // If there are no segments that end after the first slot, we're done.
+ if (SegmentI == SegmentE)
+ return false;
+
+ // Look for each slot in the live range.
+ for ( ; SlotI != SlotE; ++SlotI) {
+ // Go to the next segment that ends after the current slot.
+ // The slot may be within a hole in the range.
+ SegmentI = advanceTo(SegmentI, *SlotI);
+ if (SegmentI == SegmentE)
+ return false;
+
+ // If this segment contains the slot, we're done.
+ if (SegmentI->contains(*SlotI))
+ return true;
+ // Otherwise, look for the next slot.
+ }
+
+ // We didn't find a segment containing any of the slots.
+ return false;
+}
+
+void LiveInterval::freeSubRange(SubRange *S) {
+ S->~SubRange();
+ // Memory was allocated with BumpPtr allocator and is not freed here.
+}
+
+void LiveInterval::removeEmptySubRanges() {
+ SubRange **NextPtr = &SubRanges;
+ SubRange *I = *NextPtr;
+ while (I != nullptr) {
+ if (!I->empty()) {
+ NextPtr = &I->Next;
+ I = *NextPtr;
+ continue;
+ }
+ // Skip empty subranges until we find the first nonempty one.
+ do {
+ SubRange *Next = I->Next;
+ freeSubRange(I);
+ I = Next;
+ } while (I != nullptr && I->empty());
+ *NextPtr = I;
+ }
+}
+
+void LiveInterval::clearSubRanges() {
+ for (SubRange *I = SubRanges, *Next; I != nullptr; I = Next) {
+ Next = I->Next;
+ freeSubRange(I);
+ }
+ SubRanges = nullptr;
+}
+
+unsigned LiveInterval::getSize() const {
+ unsigned Sum = 0;
+ for (const Segment &S : segments)
+ Sum += S.start.distance(S.end);
+ return Sum;
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) {
+ return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveRange::Segment::dump() const {
+ dbgs() << *this << '\n';
+}
+#endif
+
+void LiveRange::print(raw_ostream &OS) const {
+ if (empty())
+ OS << "EMPTY";
+ else {
+ for (const Segment &S : segments) {
+ OS << S;
+ assert(S.valno == getValNumInfo(S.valno->id) && "Bad VNInfo");
+ }
+ }
+
+ // Print value number info.
+ if (getNumValNums()) {
+ OS << " ";
+ unsigned vnum = 0;
+ for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+ ++i, ++vnum) {
+ const VNInfo *vni = *i;
+ if (vnum) OS << ' ';
+ OS << vnum << '@';
+ if (vni->isUnused()) {
+ OS << 'x';
+ } else {
+ OS << vni->def;
+ if (vni->isPHIDef())
+ OS << "-phi";
+ }
+ }
+ }
+}
+
+void LiveInterval::SubRange::print(raw_ostream &OS) const {
+ OS << " L" << PrintLaneMask(LaneMask) << ' '
+ << static_cast<const LiveRange&>(*this);
+}
+
+void LiveInterval::print(raw_ostream &OS) const {
+ OS << PrintReg(reg) << ' ';
+ super::print(OS);
+ // Print subranges
+ for (const SubRange &SR : subranges())
+ OS << SR;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveRange::dump() const {
+ dbgs() << *this << '\n';
+}
+
+LLVM_DUMP_METHOD void LiveInterval::SubRange::dump() const {
+ dbgs() << *this << '\n';
+}
+
+LLVM_DUMP_METHOD void LiveInterval::dump() const {
+ dbgs() << *this << '\n';
+}
+#endif
+
+#ifndef NDEBUG
+void LiveRange::verify() const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ assert(I->start.isValid());
+ assert(I->end.isValid());
+ assert(I->start < I->end);
+ assert(I->valno != nullptr);
+ assert(I->valno->id < valnos.size());
+ assert(I->valno == valnos[I->valno->id]);
+ if (std::next(I) != E) {
+ assert(I->end <= std::next(I)->start);
+ if (I->end == std::next(I)->start)
+ assert(I->valno != std::next(I)->valno);
+ }
+ }
+}
+
+void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
+ super::verify();
+
+ // Make sure SubRanges are fine and LaneMasks are disjunct.
+ LaneBitmask Mask = 0;
+ LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
+ for (const SubRange &SR : subranges()) {
+ // Subrange lanemask should be disjunct to any previous subrange masks.
+ assert((Mask & SR.LaneMask) == 0);
+ Mask |= SR.LaneMask;
+
+ // subrange mask should not contained in maximum lane mask for the vreg.
+ assert((Mask & ~MaxMask) == 0);
+ // empty subranges must be removed.
+ assert(!SR.empty());
+
+ SR.verify();
+ // Main liverange should cover subrange.
+ assert(covers(SR));
+ }
+}
+#endif
+
+
+//===----------------------------------------------------------------------===//
+// LiveRangeUpdater class
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeUpdater class always maintains these invariants:
+//
+// - When LastStart is invalid, Spills is empty and the iterators are invalid.
+// This is the initial state, and the state created by flush().
+// In this state, isDirty() returns false.
+//
+// Otherwise, segments are kept in three separate areas:
+//
+// 1. [begin; WriteI) at the front of LR.
+// 2. [ReadI; end) at the back of LR.
+// 3. Spills.
+//
+// - LR.begin() <= WriteI <= ReadI <= LR.end().
+// - Segments in all three areas are fully ordered and coalesced.
+// - Segments in area 1 precede and can't coalesce with segments in area 2.
+// - Segments in Spills precede and can't coalesce with segments in area 2.
+// - No coalescing is possible between segments in Spills and segments in area
+// 1, and there are no overlapping segments.
+//
+// The segments in Spills are not ordered with respect to the segments in area
+// 1. They need to be merged.
+//
+// When they exist, Spills.back().start <= LastStart,
+// and WriteI[-1].start <= LastStart.
+
+void LiveRangeUpdater::print(raw_ostream &OS) const {
+ if (!isDirty()) {
+ if (LR)
+ OS << "Clean updater: " << *LR << '\n';
+ else
+ OS << "Null updater.\n";
+ return;
+ }
+ assert(LR && "Can't have null LR in dirty updater.");
+ OS << " updater with gap = " << (ReadI - WriteI)
+ << ", last start = " << LastStart
+ << ":\n Area 1:";
+ for (const auto &S : make_range(LR->begin(), WriteI))
+ OS << ' ' << S;
+ OS << "\n Spills:";
+ for (unsigned I = 0, E = Spills.size(); I != E; ++I)
+ OS << ' ' << Spills[I];
+ OS << "\n Area 2:";
+ for (const auto &S : make_range(ReadI, LR->end()))
+ OS << ' ' << S;
+ OS << '\n';
+}
+
+LLVM_DUMP_METHOD void LiveRangeUpdater::dump() const {
+ print(errs());
+}
+
+// Determine if A and B should be coalesced.
+static inline bool coalescable(const LiveRange::Segment &A,
+ const LiveRange::Segment &B) {
+ assert(A.start <= B.start && "Unordered live segments.");
+ if (A.end == B.start)
+ return A.valno == B.valno;
+ if (A.end < B.start)
+ return false;
+ assert(A.valno == B.valno && "Cannot overlap different values");
+ return true;
+}
+
+void LiveRangeUpdater::add(LiveRange::Segment Seg) {
+ assert(LR && "Cannot add to a null destination");
+
+ // Fall back to the regular add method if the live range
+ // is using the segment set instead of the segment vector.
+ if (LR->segmentSet != nullptr) {
+ LR->addSegmentToSet(Seg);
+ return;
+ }
+
+ // Flush the state if Start moves backwards.
+ if (!LastStart.isValid() || LastStart > Seg.start) {
+ if (isDirty())
+ flush();
+ // This brings us to an uninitialized state. Reinitialize.
+ assert(Spills.empty() && "Leftover spilled segments");
+ WriteI = ReadI = LR->begin();
+ }
+
+ // Remember start for next time.
+ LastStart = Seg.start;
+
+ // Advance ReadI until it ends after Seg.start.
+ LiveRange::iterator E = LR->end();
+ if (ReadI != E && ReadI->end <= Seg.start) {
+ // First try to close the gap between WriteI and ReadI with spills.
+ if (ReadI != WriteI)
+ mergeSpills();
+ // Then advance ReadI.
+ if (ReadI == WriteI)
+ ReadI = WriteI = LR->find(Seg.start);
+ else
+ while (ReadI != E && ReadI->end <= Seg.start)
+ *WriteI++ = *ReadI++;
+ }
+
+ assert(ReadI == E || ReadI->end > Seg.start);
+
+ // Check if the ReadI segment begins early.
+ if (ReadI != E && ReadI->start <= Seg.start) {
+ assert(ReadI->valno == Seg.valno && "Cannot overlap different values");
+ // Bail if Seg is completely contained in ReadI.
+ if (ReadI->end >= Seg.end)
+ return;
+ // Coalesce into Seg.
+ Seg.start = ReadI->start;
+ ++ReadI;
+ }
+
+ // Coalesce as much as possible from ReadI into Seg.
+ while (ReadI != E && coalescable(Seg, *ReadI)) {
+ Seg.end = std::max(Seg.end, ReadI->end);
+ ++ReadI;
+ }
+
+ // Try coalescing Spills.back() into Seg.
+ if (!Spills.empty() && coalescable(Spills.back(), Seg)) {
+ Seg.start = Spills.back().start;
+ Seg.end = std::max(Spills.back().end, Seg.end);
+ Spills.pop_back();
+ }
+
+ // Try coalescing Seg into WriteI[-1].
+ if (WriteI != LR->begin() && coalescable(WriteI[-1], Seg)) {
+ WriteI[-1].end = std::max(WriteI[-1].end, Seg.end);
+ return;
+ }
+
+ // Seg doesn't coalesce with anything, and needs to be inserted somewhere.
+ if (WriteI != ReadI) {
+ *WriteI++ = Seg;
+ return;
+ }
+
+ // Finally, append to LR or Spills.
+ if (WriteI == E) {
+ LR->segments.push_back(Seg);
+ WriteI = ReadI = LR->end();
+ } else
+ Spills.push_back(Seg);
+}
+
+// Merge as many spilled segments as possible into the gap between WriteI
+// and ReadI. Advance WriteI to reflect the inserted instructions.
+void LiveRangeUpdater::mergeSpills() {
+ // Perform a backwards merge of Spills and [SpillI;WriteI).
+ size_t GapSize = ReadI - WriteI;
+ size_t NumMoved = std::min(Spills.size(), GapSize);
+ LiveRange::iterator Src = WriteI;
+ LiveRange::iterator Dst = Src + NumMoved;
+ LiveRange::iterator SpillSrc = Spills.end();
+ LiveRange::iterator B = LR->begin();
+
+ // This is the new WriteI position after merging spills.
+ WriteI = Dst;
+
+ // Now merge Src and Spills backwards.
+ while (Src != Dst) {
+ if (Src != B && Src[-1].start > SpillSrc[-1].start)
+ *--Dst = *--Src;
+ else
+ *--Dst = *--SpillSrc;
+ }
+ assert(NumMoved == size_t(Spills.end() - SpillSrc));
+ Spills.erase(SpillSrc, Spills.end());
+}
+
+void LiveRangeUpdater::flush() {
+ if (!isDirty())
+ return;
+ // Clear the dirty state.
+ LastStart = SlotIndex();
+
+ assert(LR && "Cannot add to a null destination");
+
+ // Nothing to merge?
+ if (Spills.empty()) {
+ LR->segments.erase(WriteI, ReadI);
+ LR->verify();
+ return;
+ }
+
+ // Resize the WriteI - ReadI gap to match Spills.
+ size_t GapSize = ReadI - WriteI;
+ if (GapSize < Spills.size()) {
+ // The gap is too small. Make some room.
+ size_t WritePos = WriteI - LR->begin();
+ LR->segments.insert(ReadI, Spills.size() - GapSize, LiveRange::Segment());
+ // This also invalidated ReadI, but it is recomputed below.
+ WriteI = LR->begin() + WritePos;
+ } else {
+ // Shrink the gap if necessary.
+ LR->segments.erase(WriteI + Spills.size(), ReadI);
+ }
+ ReadI = WriteI + Spills.size();
+ mergeSpills();
+ LR->verify();
+}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
+ // Create initial equivalence classes.
+ EqClass.clear();
+ EqClass.grow(LR.getNumValNums());
+
+ const VNInfo *used = nullptr, *unused = nullptr;
+
+ // Determine connections.
+ for (const VNInfo *VNI : LR.valnos) {
+ // Group all unused values into one class.
+ if (VNI->isUnused()) {
+ if (unused)
+ EqClass.join(unused->id, VNI->id);
+ unused = VNI;
+ continue;
+ }
+ used = VNI;
+ if (VNI->isPHIDef()) {
+ const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
+ assert(MBB && "Phi-def has no defining MBB");
+ // Connect to values live out of predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI)
+ if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
+ EqClass.join(VNI->id, PVNI->id);
+ } else {
+ // Normal value defined by an instruction. Check for two-addr redef.
+ // FIXME: This could be coincidental. Should we really check for a tied
+ // operand constraint?
+ // Note that VNI->def may be a use slot for an early clobber def.
+ if (const VNInfo *UVNI = LR.getVNInfoBefore(VNI->def))
+ EqClass.join(VNI->id, UVNI->id);
+ }
+ }
+
+ // Lump all the unused values in with the last used value.
+ if (used && unused)
+ EqClass.join(used->id, unused->id);
+
+ EqClass.compress();
+ return EqClass.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
+ MachineRegisterInfo &MRI) {
+ // Rewrite instructions.
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = *RI;
+ MachineInstr *MI = RI->getParent();
+ ++RI;
+ // DBG_VALUE instructions don't have slot indexes, so get the index of the
+ // instruction before them.
+ // Normally, DBG_VALUE instructions are removed before this function is
+ // called, but it is not a requirement.
+ SlotIndex Idx;
+ if (MI->isDebugValue())
+ Idx = LIS.getSlotIndexes()->getIndexBefore(*MI);
+ else
+ Idx = LIS.getInstructionIndex(*MI);
+ LiveQueryResult LRQ = LI.Query(Idx);
+ const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+ // In the case of an <undef> use that isn't tied to any def, VNI will be
+ // NULL. If the use is tied to a def, VNI will be the defined value.
+ if (!VNI)
+ continue;
+ if (unsigned EqClass = getEqClass(VNI))
+ MO.setReg(LIV[EqClass-1]->reg);
+ }
+
+ // Distribute subregister liveranges.
+ if (LI.hasSubRanges()) {
+ unsigned NumComponents = EqClass.getNumClasses();
+ SmallVector<unsigned, 8> VNIMapping;
+ SmallVector<LiveInterval::SubRange*, 8> SubRanges;
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ // Create new subranges in the split intervals and construct a mapping
+ // for the VNInfos in the subrange.
+ unsigned NumValNos = SR.valnos.size();
+ VNIMapping.clear();
+ VNIMapping.reserve(NumValNos);
+ SubRanges.clear();
+ SubRanges.resize(NumComponents-1, nullptr);
+ for (unsigned I = 0; I < NumValNos; ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ unsigned ComponentNum;
+ if (VNI.isUnused()) {
+ ComponentNum = 0;
+ } else {
+ const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def);
+ assert(MainRangeVNI != nullptr
+ && "SubRange def must have corresponding main range def");
+ ComponentNum = getEqClass(MainRangeVNI);
+ if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) {
+ SubRanges[ComponentNum-1]
+ = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask);
+ }
+ }
+ VNIMapping.push_back(ComponentNum);
+ }
+ DistributeRange(SR, SubRanges.data(), VNIMapping);
+ }
+ LI.removeEmptySubRanges();
+ }
+
+ // Distribute main liverange.
+ DistributeRange(LI, LIV, EqClass);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 000000000000..5f3281f6771d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,1575 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and computes live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+char LiveIntervals::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervals::ID;
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+
+#ifndef NDEBUG
+static cl::opt<bool> EnablePrecomputePhysRegs(
+ "precompute-phys-liveness", cl::Hidden,
+ cl::desc("Eagerly compute live intervals for all physreg units."));
+#else
+static bool EnablePrecomputePhysRegs = false;
+#endif // NDEBUG
+
+static cl::opt<bool> EnableSubRegLiveness(
+ "enable-subreg-liveness", cl::Hidden, cl::init(true),
+ cl::desc("Enable subregister liveness tracking."));
+
+namespace llvm {
+cl::opt<bool> UseSegmentSetForPhysRegs(
+ "use-segment-set-for-physregs", cl::Hidden, cl::init(true),
+ cl::desc(
+ "Use segment set for the computation of the live ranges of physregs."));
+}
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequiredTransitiveID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveIntervals::LiveIntervals() : MachineFunctionPass(ID),
+ DomTree(nullptr), LRCalc(nullptr) {
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+}
+
+LiveIntervals::~LiveIntervals() {
+ delete LRCalc;
+}
+
+void LiveIntervals::releaseMemory() {
+ // Free the live intervals themselves.
+ for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i)
+ delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)];
+ VirtRegIntervals.clear();
+ RegMaskSlots.clear();
+ RegMaskBits.clear();
+ RegMaskBlocks.clear();
+
+ for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
+ delete RegUnitRanges[i];
+ RegUnitRanges.clear();
+
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+}
+
+/// runOnMachineFunction - calculates LiveIntervals
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ MRI = &MF->getRegInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+
+ if (EnableSubRegLiveness && MF->getSubtarget().enableSubRegLiveness())
+ MRI->enableSubRegLiveness(true);
+
+ if (!LRCalc)
+ LRCalc = new LiveRangeCalc();
+
+ // Allocate space for all virtual registers.
+ VirtRegIntervals.resize(MRI->getNumVirtRegs());
+
+ computeVirtRegs();
+ computeRegMasks();
+ computeLiveInRegUnits();
+
+ if (EnablePrecomputePhysRegs) {
+ // For stress testing, precompute live ranges of all physical register
+ // units, including reserved registers.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ getRegUnit(i);
+ }
+ DEBUG(dump());
+ return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+ OS << "********** INTERVALS **********\n";
+
+ // Dump the regunits.
+ for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
+ if (LiveRange *LR = RegUnitRanges[i])
+ OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n';
+
+ // Dump the virtregs.
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (hasInterval(Reg))
+ OS << getInterval(Reg) << '\n';
+ }
+
+ OS << "RegMasks:";
+ for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
+ OS << ' ' << RegMaskSlots[i];
+ OS << '\n';
+
+ printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+ OS << "********** MACHINEINSTRS **********\n";
+ MF->print(OS, Indexes);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void LiveIntervals::dumpInstrs() const {
+ printInstrs(dbgs());
+}
+#endif
+
+LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+ float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ?
+ llvm::huge_valf : 0.0F;
+ return new LiveInterval(reg, Weight);
+}
+
+
+/// computeVirtRegInterval - Compute the live interval of a virtual register,
+/// based on defs and uses.
+void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
+ assert(LRCalc && "LRCalc not initialized.");
+ assert(LI.empty() && "Should only compute empty intervals.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
+ computeDeadValues(LI, nullptr);
+}
+
+void LiveIntervals::computeVirtRegs() {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ createAndComputeVirtRegInterval(Reg);
+ }
+}
+
+void LiveIntervals::computeRegMasks() {
+ RegMaskBlocks.resize(MF->getNumBlockIDs());
+
+ // Find all instructions with regmask operands.
+ for (MachineBasicBlock &MBB : *MF) {
+ std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()];
+ RMB.first = RegMaskSlots.size();
+
+ // Some block starts, such as EH funclets, create masks.
+ if (const uint32_t *Mask = MBB.getBeginClobberMask(TRI)) {
+ RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB));
+ RegMaskBits.push_back(Mask);
+ }
+
+ for (MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
+ RegMaskBits.push_back(MO.getRegMask());
+ }
+ }
+
+ // Some block ends, such as funclet returns, create masks. Put the mask on
+ // the last instruction of the block, because MBB slot index intervals are
+ // half-open.
+ if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) {
+ assert(!MBB.empty() && "empty return block?");
+ RegMaskSlots.push_back(
+ Indexes->getInstructionIndex(MBB.back()).getRegSlot());
+ RegMaskBits.push_back(Mask);
+ }
+
+ // Compute the number of register mask instructions in this block.
+ RMB.second = RegMaskSlots.size() - RMB.first;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register Unit Liveness
+//===----------------------------------------------------------------------===//
+//
+// Fixed interference typically comes from ABI boundaries: Function arguments
+// and return values are passed in fixed registers, and so are exception
+// pointers entering landing pads. Certain instructions require values to be
+// present in specific registers. That is also represented through fixed
+// interference.
+//
+
+/// computeRegUnitInterval - Compute the live range of a register unit, based
+/// on the uses and defs of aliasing registers. The range should be empty,
+/// or contain only dead phi-defs from ABI blocks.
+void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+
+ // The physregs aliasing Unit are the roots and their super-registers.
+ // Create all values as dead defs before extending to uses. Note that roots
+ // may share super-registers. That's OK because createDeadDefs() is
+ // idempotent. It is very rare for a register unit to have multiple roots, so
+ // uniquing super-registers is probably not worthwhile.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
+ Supers.isValid(); ++Supers) {
+ if (!MRI->reg_empty(*Supers))
+ LRCalc->createDeadDefs(LR, *Supers);
+ }
+ }
+
+ // Now extend LR to reach all uses.
+ // Ignore uses of reserved registers. We only track defs of those.
+ for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
+ for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
+ Supers.isValid(); ++Supers) {
+ unsigned Reg = *Supers;
+ if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
+ LRCalc->extendToUses(LR, Reg);
+ }
+ }
+
+ // Flush the segment set to the segment vector.
+ if (UseSegmentSetForPhysRegs)
+ LR.flushSegmentSet();
+}
+
+
+/// computeLiveInRegUnits - Precompute the live ranges of any register units
+/// that are live-in to an ABI block somewhere. Register values can appear
+/// without a corresponding def when entering the entry block or a landing pad.
+///
+void LiveIntervals::computeLiveInRegUnits() {
+ RegUnitRanges.resize(TRI->getNumRegUnits());
+ DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
+
+ // Keep track of the live range sets allocated.
+ SmallVector<unsigned, 8> NewRanges;
+
+ // Check all basic blocks for live-ins.
+ for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+ MFI != MFE; ++MFI) {
+ const MachineBasicBlock *MBB = &*MFI;
+
+ // We only care about ABI blocks: Entry + landing pads.
+ if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty())
+ continue;
+
+ // Create phi-defs at Begin for all live-in registers.
+ SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
+ DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
+ for (const auto &LI : MBB->liveins()) {
+ for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = *Units;
+ LiveRange *LR = RegUnitRanges[Unit];
+ if (!LR) {
+ // Use segment set to speed-up initial computation of the live range.
+ LR = RegUnitRanges[Unit] = new LiveRange(UseSegmentSetForPhysRegs);
+ NewRanges.push_back(Unit);
+ }
+ VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator());
+ (void)VNI;
+ DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n");
+
+ // Compute the 'normal' part of the ranges.
+ for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) {
+ unsigned Unit = NewRanges[i];
+ computeRegUnitRange(*RegUnitRanges[Unit], Unit);
+ }
+}
+
+
+static void createSegmentsForValues(LiveRange &LR,
+ iterator_range<LiveInterval::vni_iterator> VNIs) {
+ for (auto VNI : VNIs) {
+ if (VNI->isUnused())
+ continue;
+ SlotIndex Def = VNI->def;
+ LR.addSegment(LiveRange::Segment(Def, Def.getDeadSlot(), VNI));
+ }
+}
+
+typedef SmallVector<std::pair<SlotIndex, VNInfo*>, 16> ShrinkToUsesWorkList;
+
+static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
+ ShrinkToUsesWorkList &WorkList,
+ const LiveRange &OldRange) {
+ // Keep track of the PHIs that are in use.
+ SmallPtrSet<VNInfo*, 8> UsedPHIs;
+ // Blocks that have already been added to WorkList as live-out.
+ SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+ const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Idx.getPrevSlot());
+ SlotIndex BlockStart = Indexes.getMBBStartIdx(MBB);
+
+ // Extend the live range for VNI to be live at Idx.
+ if (VNInfo *ExtVNI = LR.extendInBlock(BlockStart, Idx)) {
+ assert(ExtVNI == VNI && "Unexpected existing value number");
+ (void)ExtVNI;
+ // Is this a PHIDef we haven't seen before?
+ if (!VNI->isPHIDef() || VNI->def != BlockStart ||
+ !UsedPHIs.insert(VNI).second)
+ continue;
+ // The PHI is live, make sure the predecessors are live-out.
+ for (auto &Pred : MBB->predecessors()) {
+ if (!LiveOut.insert(Pred).second)
+ continue;
+ SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (VNInfo *PVNI = OldRange.getVNInfoBefore(Stop))
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (auto &Pred : MBB->predecessors()) {
+ if (!LiveOut.insert(Pred).second)
+ continue;
+ SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
+ assert(OldRange.getVNInfoBefore(Stop) == VNI &&
+ "Wrong value out of predecessor");
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ }
+ }
+}
+
+bool LiveIntervals::shrinkToUses(LiveInterval *li,
+ SmallVectorImpl<MachineInstr*> *dead) {
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+ && "Can only shrink virtual registers");
+
+ // Shrink subregister live ranges.
+ bool NeedsCleanup = false;
+ for (LiveInterval::SubRange &S : li->subranges()) {
+ shrinkToUses(S, li->reg);
+ if (S.empty())
+ NeedsCleanup = true;
+ }
+ if (NeedsCleanup)
+ li->removeEmptySubRanges();
+
+ // Find all the values used, including PHI kills.
+ ShrinkToUsesWorkList WorkList;
+
+ // Visit all instructions reading li->reg.
+ for (MachineRegisterInfo::reg_instr_iterator
+ I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end();
+ I != E; ) {
+ MachineInstr *UseMI = &*(I++);
+ if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ continue;
+ SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
+ LiveQueryResult LRQ = li->Query(Idx);
+ VNInfo *VNI = LRQ.valueIn();
+ if (!VNI) {
+ // This shouldn't happen: readsVirtualRegister returns true, but there is
+ // no live value. It is likely caused by a target getting <undef> flags
+ // wrong.
+ DEBUG(dbgs() << Idx << '\t' << *UseMI
+ << "Warning: Instr claims to read non-existent value in "
+ << *li << '\n');
+ continue;
+ }
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ if (VNInfo *DefVNI = LRQ.valueDefined())
+ Idx = DefVNI->def;
+
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create new live ranges with only minimal live segments per def.
+ LiveRange NewLR;
+ createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end()));
+ extendSegmentsToUses(NewLR, *Indexes, WorkList, *li);
+
+ // Move the trimmed segments back.
+ li->segments.swap(NewLR.segments);
+
+ // Handle dead values.
+ bool CanSeparate = computeDeadValues(*li, dead);
+ DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ return CanSeparate;
+}
+
+bool LiveIntervals::computeDeadValues(LiveInterval &LI,
+ SmallVectorImpl<MachineInstr*> *dead) {
+ bool MayHaveSplitComponents = false;
+ for (auto VNI : LI.valnos) {
+ if (VNI->isUnused())
+ continue;
+ SlotIndex Def = VNI->def;
+ LiveRange::iterator I = LI.FindSegmentContaining(Def);
+ assert(I != LI.end() && "Missing segment for VNI");
+
+ // Is the register live before? Otherwise we may have to add a read-undef
+ // flag for subregister defs.
+ unsigned VReg = LI.reg;
+ if (MRI->shouldTrackSubRegLiveness(VReg)) {
+ if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
+ MachineInstr *MI = getInstructionFromIndex(Def);
+ MI->setRegisterDefReadUndef(VReg);
+ }
+ }
+
+ if (I->end != Def.getDeadSlot())
+ continue;
+ if (VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->markUnused();
+ LI.removeSegment(I);
+ DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
+ MayHaveSplitComponents = true;
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(Def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(LI.reg, TRI);
+ if (dead && MI->allDefsAreDead()) {
+ DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
+ dead->push_back(MI);
+ }
+ }
+ }
+ return MayHaveSplitComponents;
+}
+
+void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
+{
+ DEBUG(dbgs() << "Shrink: " << SR << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(Reg)
+ && "Can only shrink virtual registers");
+ // Find all the values used, including PHI kills.
+ ShrinkToUsesWorkList WorkList;
+
+ // Visit all instructions reading Reg.
+ SlotIndex LastIdx;
+ for (MachineOperand &MO : MRI->reg_operands(Reg)) {
+ MachineInstr *UseMI = MO.getParent();
+ if (UseMI->isDebugValue())
+ continue;
+ // Maybe the operand is for a subregister we don't care about.
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((LaneMask & SR.LaneMask) == 0)
+ continue;
+ }
+ // We only need to visit each instruction once.
+ SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
+ if (Idx == LastIdx)
+ continue;
+ LastIdx = Idx;
+
+ LiveQueryResult LRQ = SR.Query(Idx);
+ VNInfo *VNI = LRQ.valueIn();
+ // For Subranges it is possible that only undef values are left in that
+ // part of the subregister, so there is no real liverange at the use
+ if (!VNI)
+ continue;
+
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ if (VNInfo *DefVNI = LRQ.valueDefined())
+ Idx = DefVNI->def;
+
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create a new live ranges with only minimal live segments per def.
+ LiveRange NewLR;
+ createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end()));
+ extendSegmentsToUses(NewLR, *Indexes, WorkList, SR);
+
+ // Move the trimmed ranges back.
+ SR.segments.swap(NewLR.segments);
+
+ // Remove dead PHI value numbers
+ for (auto VNI : SR.valnos) {
+ if (VNI->isUnused())
+ continue;
+ const LiveRange::Segment *Segment = SR.getSegmentContaining(VNI->def);
+ assert(Segment != nullptr && "Missing segment for VNI");
+ if (Segment->end != VNI->def.getDeadSlot())
+ continue;
+ if (VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
+ VNI->markUnused();
+ SR.removeSegment(*Segment);
+ }
+ }
+
+ DEBUG(dbgs() << "Shrunk: " << SR << '\n');
+}
+
+void LiveIntervals::extendToIndices(LiveRange &LR,
+ ArrayRef<SlotIndex> Indices) {
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+ LRCalc->extend(LR, Indices[i]);
+}
+
+void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
+ SmallVectorImpl<SlotIndex> *EndPoints) {
+ LiveQueryResult LRQ = LR.Query(Kill);
+ VNInfo *VNI = LRQ.valueOutOrDead();
+ if (!VNI)
+ return;
+
+ MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
+ SlotIndex MBBEnd = Indexes->getMBBEndIdx(KillMBB);
+
+ // If VNI isn't live out from KillMBB, the value is trivially pruned.
+ if (LRQ.endPoint() < MBBEnd) {
+ LR.removeSegment(Kill, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ return;
+ }
+
+ // VNI is live out of KillMBB.
+ LR.removeSegment(Kill, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+
+ // Find all blocks that are reachable from KillMBB without leaving VNI's live
+ // range. It is possible that KillMBB itself is reachable, so start a DFS
+ // from each successor.
+ typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+ VisitedTy Visited;
+ for (MachineBasicBlock::succ_iterator
+ SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
+ SuccI != SuccE; ++SuccI) {
+ for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+ I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+ I != E;) {
+ MachineBasicBlock *MBB = *I;
+
+ // Check if VNI is live in to MBB.
+ SlotIndex MBBStart, MBBEnd;
+ std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+ LiveQueryResult LRQ = LR.Query(MBBStart);
+ if (LRQ.valueIn() != VNI) {
+ // This block isn't part of the VNI segment. Prune the search.
+ I.skipChildren();
+ continue;
+ }
+
+ // Prune the search if VNI is killed in MBB.
+ if (LRQ.endPoint() < MBBEnd) {
+ LR.removeSegment(MBBStart, LRQ.endPoint());
+ if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+ I.skipChildren();
+ continue;
+ }
+
+ // VNI is live through MBB.
+ LR.removeSegment(MBBStart, MBBEnd);
+ if (EndPoints) EndPoints->push_back(MBBEnd);
+ ++I;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
+ // Keep track of regunit ranges.
+ SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
+ // Keep track of subregister ranges.
+ SmallVector<std::pair<const LiveInterval::SubRange*,
+ LiveRange::const_iterator>, 4> SRs;
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ const LiveInterval &LI = getInterval(Reg);
+ if (LI.empty())
+ continue;
+
+ // Find the regunit intervals for the assigned register. They may overlap
+ // the virtual register live range, cancelling any kills.
+ RU.clear();
+ for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
+ ++Units) {
+ const LiveRange &RURange = getRegUnit(*Units);
+ if (RURange.empty())
+ continue;
+ RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
+ }
+
+ if (MRI->subRegLivenessEnabled()) {
+ SRs.clear();
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end)));
+ }
+ }
+
+ // Every instruction that kills Reg corresponds to a segment range end
+ // point.
+ for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
+ ++RI) {
+ // A block index indicates an MBB edge.
+ if (RI->end.isBlock())
+ continue;
+ MachineInstr *MI = getInstructionFromIndex(RI->end);
+ if (!MI)
+ continue;
+
+ // Check if any of the regunits are live beyond the end of RI. That could
+ // happen when a physreg is defined as a copy of a virtreg:
+ //
+ // %EAX = COPY %vreg5
+ // FOO %vreg5 <--- MI, cancel kill because %EAX is live.
+ // BAR %EAX<kill>
+ //
+ // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX.
+ for (auto &RUP : RU) {
+ const LiveRange &RURange = *RUP.first;
+ LiveRange::const_iterator &I = RUP.second;
+ if (I == RURange.end())
+ continue;
+ I = RURange.advanceTo(I, RI->end);
+ if (I == RURange.end() || I->start >= RI->end)
+ continue;
+ // I is overlapping RI.
+ goto CancelKill;
+ }
+
+ if (MRI->subRegLivenessEnabled()) {
+ // When reading a partial undefined value we must not add a kill flag.
+ // The regalloc might have used the undef lane for something else.
+ // Example:
+ // %vreg1 = ... ; R32: %vreg1
+ // %vreg2:high16 = ... ; R64: %vreg2
+ // = read %vreg2<kill> ; R64: %vreg2
+ // = read %vreg1 ; R32: %vreg1
+ // The <kill> flag is correct for %vreg2, but the register allocator may
+ // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0
+ // are actually never written by %vreg2. After assignment the <kill>
+ // flag at the read instruction is invalid.
+ LaneBitmask DefinedLanesMask;
+ if (!SRs.empty()) {
+ // Compute a mask of lanes that are defined.
+ DefinedLanesMask = 0;
+ for (auto &SRP : SRs) {
+ const LiveInterval::SubRange &SR = *SRP.first;
+ LiveRange::const_iterator &I = SRP.second;
+ if (I == SR.end())
+ continue;
+ I = SR.advanceTo(I, RI->end);
+ if (I == SR.end() || I->start >= RI->end)
+ continue;
+ // I is overlapping RI
+ DefinedLanesMask |= SR.LaneMask;
+ }
+ } else
+ DefinedLanesMask = ~0u;
+
+ bool IsFullWrite = false;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (MO.isUse()) {
+ // Reading any undefined lanes?
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ if ((UseMask & ~DefinedLanesMask) != 0)
+ goto CancelKill;
+ } else if (MO.getSubReg() == 0) {
+ // Writing to the full register?
+ assert(MO.isDef());
+ IsFullWrite = true;
+ }
+ }
+
+ // If an instruction writes to a subregister, a new segment starts in
+ // the LiveInterval. But as this is only overriding part of the register
+ // adding kill-flags is not correct here after registers have been
+ // assigned.
+ if (!IsFullWrite) {
+ // Next segment has to be adjacent in the subregister write case.
+ LiveRange::const_iterator N = std::next(RI);
+ if (N != LI.end() && N->start == RI->end)
+ goto CancelKill;
+ }
+ }
+
+ MI->addRegisterKilled(Reg, nullptr);
+ continue;
+CancelKill:
+ MI->clearRegisterKills(Reg, nullptr);
+ }
+ }
+}
+
+MachineBasicBlock*
+LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+ // A local live range must be fully contained inside the block, meaning it is
+ // defined and killed at instructions, not at block boundaries. It is not
+ // live in or or out of any block.
+ //
+ // It is technically possible to have a PHI-defined live range identical to a
+ // single block, but we are going to return false in that case.
+
+ SlotIndex Start = LI.beginIndex();
+ if (Start.isBlock())
+ return nullptr;
+
+ SlotIndex Stop = LI.endIndex();
+ if (Stop.isBlock())
+ return nullptr;
+
+ // getMBBFromIndex doesn't need to search the MBB table when both indexes
+ // belong to proper instructions.
+ MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start);
+ MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop);
+ return MBB1 == MBB2 ? MBB1 : nullptr;
+}
+
+bool
+LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
+ for (const VNInfo *PHI : LI.valnos) {
+ if (PHI->isUnused() || !PHI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
+ // Conservatively return true instead of scanning huge predecessor lists.
+ if (PHIMBB->pred_size() > 100)
+ return true;
+ for (MachineBasicBlock::const_pred_iterator
+ PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ return true;
+ }
+ return false;
+}
+
+float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
+ const MachineBlockFrequencyInfo *MBFI,
+ const MachineInstr &MI) {
+ BlockFrequency Freq = MBFI->getBlockFreq(MI.getParent());
+ const float Scale = 1.0f / MBFI->getEntryFreq();
+ return (isDef + isUse) * (Freq.getFrequency() * Scale);
+}
+
+LiveRange::Segment
+LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) {
+ LiveInterval& Interval = createEmptyInterval(reg);
+ VNInfo *VN = Interval.getNextValue(
+ SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getVNInfoAllocator());
+ LiveRange::Segment S(SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+ getMBBEndIdx(startInst.getParent()), VN);
+ Interval.addSegment(S);
+
+ return S;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register mask functions
+//===----------------------------------------------------------------------===//
+
+bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
+ BitVector &UsableRegs) {
+ if (LI.empty())
+ return false;
+ LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end();
+
+ // Use a smaller arrays for local live ranges.
+ ArrayRef<SlotIndex> Slots;
+ ArrayRef<const uint32_t*> Bits;
+ if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) {
+ Slots = getRegMaskSlotsInBlock(MBB->getNumber());
+ Bits = getRegMaskBitsInBlock(MBB->getNumber());
+ } else {
+ Slots = getRegMaskSlots();
+ Bits = getRegMaskBits();
+ }
+
+ // We are going to enumerate all the register mask slots contained in LI.
+ // Start with a binary search of RegMaskSlots to find a starting point.
+ ArrayRef<SlotIndex>::iterator SlotI =
+ std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+ ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+ // No slots in range, LI begins after the last call.
+ if (SlotI == SlotE)
+ return false;
+
+ bool Found = false;
+ for (;;) {
+ assert(*SlotI >= LiveI->start);
+ // Loop over all slots overlapping this segment.
+ while (*SlotI < LiveI->end) {
+ // *SlotI overlaps LI. Collect mask bits.
+ if (!Found) {
+ // This is the first overlap. Initialize UsableRegs to all ones.
+ UsableRegs.clear();
+ UsableRegs.resize(TRI->getNumRegs(), true);
+ Found = true;
+ }
+ // Remove usable registers clobbered by this mask.
+ UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]);
+ if (++SlotI == SlotE)
+ return Found;
+ }
+ // *SlotI is beyond the current LI segment.
+ LiveI = LI.advanceTo(LiveI, *SlotI);
+ if (LiveI == LiveE)
+ return Found;
+ // Advance SlotI until it overlaps.
+ while (*SlotI < LiveI->start)
+ if (++SlotI == SlotE)
+ return Found;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// IntervalUpdate class.
+//===----------------------------------------------------------------------===//
+
+// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+class LiveIntervals::HMEditor {
+private:
+ LiveIntervals& LIS;
+ const MachineRegisterInfo& MRI;
+ const TargetRegisterInfo& TRI;
+ SlotIndex OldIdx;
+ SlotIndex NewIdx;
+ SmallPtrSet<LiveRange*, 8> Updated;
+ bool UpdateFlags;
+
+public:
+ HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
+ const TargetRegisterInfo& TRI,
+ SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags)
+ : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx),
+ UpdateFlags(UpdateFlags) {}
+
+ // FIXME: UpdateFlags is a workaround that creates live intervals for all
+ // physregs, even those that aren't needed for regalloc, in order to update
+ // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill
+ // flags, and postRA passes will use a live register utility instead.
+ LiveRange *getRegUnitLI(unsigned Unit) {
+ if (UpdateFlags)
+ return &LIS.getRegUnit(Unit);
+ return LIS.getCachedRegUnit(Unit);
+ }
+
+ /// Update all live ranges touched by MI, assuming a move from OldIdx to
+ /// NewIdx.
+ void updateAllRanges(MachineInstr *MI) {
+ DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+ bool hasRegMask = false;
+ for (MachineOperand &MO : MI->operands()) {
+ if (MO.isRegMask())
+ hasRegMask = true;
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse()) {
+ if (!MO.readsReg())
+ continue;
+ // Aggressively clear all kill flags.
+ // They are reinserted by VirtRegRewriter.
+ MO.setIsKill(false);
+ }
+
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask) == 0)
+ continue;
+ updateRange(S, Reg, S.LaneMask);
+ }
+ }
+ updateRange(LI, Reg, 0);
+ continue;
+ }
+
+ // For physregs, only update the regunits that actually have a
+ // precomputed live range.
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ if (LiveRange *LR = getRegUnitLI(*Units))
+ updateRange(*LR, *Units, 0);
+ }
+ if (hasRegMask)
+ updateRegMaskSlots();
+ }
+
+private:
+ /// Update a single live range, assuming an instruction has been moved from
+ /// OldIdx to NewIdx.
+ void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
+ if (!Updated.insert(&LR).second)
+ return;
+ DEBUG({
+ dbgs() << " ";
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ dbgs() << PrintReg(Reg);
+ if (LaneMask != 0)
+ dbgs() << " L" << PrintLaneMask(LaneMask);
+ } else {
+ dbgs() << PrintRegUnit(Reg, &TRI);
+ }
+ dbgs() << ":\t" << LR << '\n';
+ });
+ if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+ handleMoveDown(LR);
+ else
+ handleMoveUp(LR, Reg, LaneMask);
+ DEBUG(dbgs() << " -->\t" << LR << '\n');
+ LR.verify();
+ }
+
+ /// Update LR to reflect an instruction has been moved downwards from OldIdx
+ /// to NewIdx (OldIdx < NewIdx).
+ void handleMoveDown(LiveRange &LR) {
+ LiveRange::iterator E = LR.end();
+ // Segment going into OldIdx.
+ LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex());
+
+ // No value live before or after OldIdx? Nothing to do.
+ if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start))
+ return;
+
+ LiveRange::iterator OldIdxOut;
+ // Do we have a value live-in to OldIdx?
+ if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) {
+ // If the live-in value already extends to NewIdx, there is nothing to do.
+ if (SlotIndex::isEarlierEqualInstr(NewIdx, OldIdxIn->end))
+ return;
+ // Aggressively remove all kill flags from the old kill point.
+ // Kill flags shouldn't be used while live intervals exist, they will be
+ // reinserted by VirtRegRewriter.
+ if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end))
+ for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO)
+ if (MO->isReg() && MO->isUse())
+ MO->setIsKill(false);
+
+ // Is there a def before NewIdx which is not OldIdx?
+ LiveRange::iterator Next = std::next(OldIdxIn);
+ if (Next != E && !SlotIndex::isSameInstr(OldIdx, Next->start) &&
+ SlotIndex::isEarlierInstr(Next->start, NewIdx)) {
+ // If we are here then OldIdx was just a use but not a def. We only have
+ // to ensure liveness extends to NewIdx.
+ LiveRange::iterator NewIdxIn =
+ LR.advanceTo(Next, NewIdx.getBaseIndex());
+ // Extend the segment before NewIdx if necessary.
+ if (NewIdxIn == E ||
+ !SlotIndex::isEarlierInstr(NewIdxIn->start, NewIdx)) {
+ LiveRange::iterator Prev = std::prev(NewIdxIn);
+ Prev->end = NewIdx.getRegSlot();
+ }
+ return;
+ }
+
+ // Adjust OldIdxIn->end to reach NewIdx. This may temporarily make LR
+ // invalid by overlapping ranges.
+ bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end);
+ OldIdxIn->end = NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber());
+ // If this was not a kill, then there was no def and we're done.
+ if (!isKill)
+ return;
+
+ // Did we have a Def at OldIdx?
+ OldIdxOut = Next;
+ if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start))
+ return;
+ } else {
+ OldIdxOut = OldIdxIn;
+ }
+
+ // If we are here then there is a Definition at OldIdx. OldIdxOut points
+ // to the segment starting there.
+ assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) &&
+ "No def?");
+ VNInfo *OldIdxVNI = OldIdxOut->valno;
+ assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def");
+
+ // If the defined value extends beyond NewIdx, just move the beginning
+ // of the segment to NewIdx.
+ SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber());
+ if (SlotIndex::isEarlierInstr(NewIdxDef, OldIdxOut->end)) {
+ OldIdxVNI->def = NewIdxDef;
+ OldIdxOut->start = OldIdxVNI->def;
+ return;
+ }
+
+ // If we are here then we have a Definition at OldIdx which ends before
+ // NewIdx.
+
+ // Is there an existing Def at NewIdx?
+ LiveRange::iterator AfterNewIdx
+ = LR.advanceTo(OldIdxOut, NewIdx.getRegSlot());
+ bool OldIdxDefIsDead = OldIdxOut->end.isDead();
+ if (!OldIdxDefIsDead &&
+ SlotIndex::isEarlierInstr(OldIdxOut->end, NewIdxDef)) {
+ // OldIdx is not a dead def, and NewIdxDef is inside a new interval.
+ VNInfo *DefVNI;
+ if (OldIdxOut != LR.begin() &&
+ !SlotIndex::isEarlierInstr(std::prev(OldIdxOut)->end,
+ OldIdxOut->start)) {
+ // There is no gap between OldIdxOut and its predecessor anymore,
+ // merge them.
+ LiveRange::iterator IPrev = std::prev(OldIdxOut);
+ DefVNI = OldIdxVNI;
+ IPrev->end = OldIdxOut->end;
+ } else {
+ // The value is live in to OldIdx
+ LiveRange::iterator INext = std::next(OldIdxOut);
+ assert(INext != E && "Must have following segment");
+ // We merge OldIdxOut and its successor. As we're dealing with subreg
+ // reordering, there is always a successor to OldIdxOut in the same BB
+ // We don't need INext->valno anymore and will reuse for the new segment
+ // we create later.
+ DefVNI = OldIdxVNI;
+ INext->start = OldIdxOut->end;
+ INext->valno->def = INext->start;
+ }
+ // If NewIdx is behind the last segment, extend that and append a new one.
+ if (AfterNewIdx == E) {
+ // OldIdxOut is undef at this point, Slide (OldIdxOut;AfterNewIdx] up
+ // one position.
+ // |- ?/OldIdxOut -| |- X0 -| ... |- Xn -| end
+ // => |- X0/OldIdxOut -| ... |- Xn -| |- undef/NewS -| end
+ std::copy(std::next(OldIdxOut), E, OldIdxOut);
+ // The last segment is undefined now, reuse it for a dead def.
+ LiveRange::iterator NewSegment = std::prev(E);
+ *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(),
+ DefVNI);
+ DefVNI->def = NewIdxDef;
+
+ LiveRange::iterator Prev = std::prev(NewSegment);
+ Prev->end = NewIdxDef;
+ } else {
+ // OldIdxOut is undef at this point, Slide (OldIdxOut;AfterNewIdx] up
+ // one position.
+ // |- ?/OldIdxOut -| |- X0 -| ... |- Xn/AfterNewIdx -| |- Next -|
+ // => |- X0/OldIdxOut -| ... |- Xn -| |- Xn/AfterNewIdx -| |- Next -|
+ std::copy(std::next(OldIdxOut), std::next(AfterNewIdx), OldIdxOut);
+ LiveRange::iterator Prev = std::prev(AfterNewIdx);
+ // We have two cases:
+ if (SlotIndex::isEarlierInstr(Prev->start, NewIdxDef)) {
+ // Case 1: NewIdx is inside a liverange. Split this liverange at
+ // NewIdxDef into the segment "Prev" followed by "NewSegment".
+ LiveRange::iterator NewSegment = AfterNewIdx;
+ *NewSegment = LiveRange::Segment(NewIdxDef, Prev->end, Prev->valno);
+ Prev->valno->def = NewIdxDef;
+
+ *Prev = LiveRange::Segment(Prev->start, NewIdxDef, DefVNI);
+ DefVNI->def = Prev->start;
+ } else {
+ // Case 2: NewIdx is in a lifetime hole. Keep AfterNewIdx as is and
+ // turn Prev into a segment from NewIdx to AfterNewIdx->start.
+ *Prev = LiveRange::Segment(NewIdxDef, AfterNewIdx->start, DefVNI);
+ DefVNI->def = NewIdxDef;
+ assert(DefVNI != AfterNewIdx->valno);
+ }
+ }
+ return;
+ }
+
+ if (AfterNewIdx != E &&
+ SlotIndex::isSameInstr(AfterNewIdx->start, NewIdxDef)) {
+ // There is an existing def at NewIdx. The def at OldIdx is coalesced into
+ // that value.
+ assert(AfterNewIdx->valno != OldIdxVNI && "Multiple defs of value?");
+ LR.removeValNo(OldIdxVNI);
+ } else {
+ // There was no existing def at NewIdx. We need to create a dead def
+ // at NewIdx. Shift segments over the old OldIdxOut segment, this frees
+ // a new segment at the place where we want to construct the dead def.
+ // |- OldIdxOut -| |- X0 -| ... |- Xn -| |- AfterNewIdx -|
+ // => |- X0/OldIdxOut -| ... |- Xn -| |- undef/NewS. -| |- AfterNewIdx -|
+ assert(AfterNewIdx != OldIdxOut && "Inconsistent iterators");
+ std::copy(std::next(OldIdxOut), AfterNewIdx, OldIdxOut);
+ // We can reuse OldIdxVNI now.
+ LiveRange::iterator NewSegment = std::prev(AfterNewIdx);
+ VNInfo *NewSegmentVNI = OldIdxVNI;
+ NewSegmentVNI->def = NewIdxDef;
+ *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(),
+ NewSegmentVNI);
+ }
+ }
+
+ /// Update LR to reflect an instruction has been moved upwards from OldIdx
+ /// to NewIdx (NewIdx < OldIdx).
+ void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
+ LiveRange::iterator E = LR.end();
+ // Segment going into OldIdx.
+ LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex());
+
+ // No value live before or after OldIdx? Nothing to do.
+ if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start))
+ return;
+
+ LiveRange::iterator OldIdxOut;
+ // Do we have a value live-in to OldIdx?
+ if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) {
+ // If the live-in value isn't killed here, then we have no Def at
+ // OldIdx, moreover the value must be live at NewIdx so there is nothing
+ // to do.
+ bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end);
+ if (!isKill)
+ return;
+
+ // At this point we have to move OldIdxIn->end back to the nearest
+ // previous use or (dead-)def but no further than NewIdx.
+ SlotIndex DefBeforeOldIdx
+ = std::max(OldIdxIn->start.getDeadSlot(),
+ NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber()));
+ OldIdxIn->end = findLastUseBefore(DefBeforeOldIdx, Reg, LaneMask);
+
+ // Did we have a Def at OldIdx? If not we are done now.
+ OldIdxOut = std::next(OldIdxIn);
+ if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start))
+ return;
+ } else {
+ OldIdxOut = OldIdxIn;
+ OldIdxIn = OldIdxOut != LR.begin() ? std::prev(OldIdxOut) : E;
+ }
+
+ // If we are here then there is a Definition at OldIdx. OldIdxOut points
+ // to the segment starting there.
+ assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) &&
+ "No def?");
+ VNInfo *OldIdxVNI = OldIdxOut->valno;
+ assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def");
+ bool OldIdxDefIsDead = OldIdxOut->end.isDead();
+
+ // Is there an existing def at NewIdx?
+ SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber());
+ LiveRange::iterator NewIdxOut = LR.find(NewIdx.getRegSlot());
+ if (SlotIndex::isSameInstr(NewIdxOut->start, NewIdx)) {
+ assert(NewIdxOut->valno != OldIdxVNI &&
+ "Same value defined more than once?");
+ // If OldIdx was a dead def remove it.
+ if (!OldIdxDefIsDead) {
+ // Remove segment starting at NewIdx and move begin of OldIdxOut to
+ // NewIdx so it can take its place.
+ OldIdxVNI->def = NewIdxDef;
+ OldIdxOut->start = NewIdxDef;
+ LR.removeValNo(NewIdxOut->valno);
+ } else {
+ // Simply remove the dead def at OldIdx.
+ LR.removeValNo(OldIdxVNI);
+ }
+ } else {
+ // Previously nothing was live after NewIdx, so all we have to do now is
+ // move the begin of OldIdxOut to NewIdx.
+ if (!OldIdxDefIsDead) {
+ // Do we have any intermediate Defs between OldIdx and NewIdx?
+ if (OldIdxIn != E &&
+ SlotIndex::isEarlierInstr(NewIdxDef, OldIdxIn->start)) {
+ // OldIdx is not a dead def and NewIdx is before predecessor start.
+ LiveRange::iterator NewIdxIn = NewIdxOut;
+ assert(NewIdxIn == LR.find(NewIdx.getBaseIndex()));
+ const SlotIndex SplitPos = NewIdxDef;
+
+ // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
+ *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
+ OldIdxIn->valno);
+ // OldIdxIn and OldIdxVNI are now undef and can be overridden.
+ // We Slide [NewIdxIn, OldIdxIn) down one position.
+ // |- X0/NewIdxIn -| ... |- Xn-1 -||- Xn/OldIdxIn -||- OldIdxOut -|
+ // => |- undef/NexIdxIn -| |- X0 -| ... |- Xn-1 -| |- Xn/OldIdxOut -|
+ std::copy_backward(NewIdxIn, OldIdxIn, OldIdxOut);
+ // NewIdxIn is now considered undef so we can reuse it for the moved
+ // value.
+ LiveRange::iterator NewSegment = NewIdxIn;
+ LiveRange::iterator Next = std::next(NewSegment);
+ if (SlotIndex::isEarlierInstr(Next->start, NewIdx)) {
+ // There is no gap between NewSegment and its predecessor.
+ *NewSegment = LiveRange::Segment(Next->start, SplitPos,
+ Next->valno);
+ *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI);
+ Next->valno->def = SplitPos;
+ } else {
+ // There is a gap between NewSegment and its predecessor
+ // Value becomes live in.
+ *NewSegment = LiveRange::Segment(SplitPos, Next->start, OldIdxVNI);
+ NewSegment->valno->def = SplitPos;
+ }
+ } else {
+ // Leave the end point of a live def.
+ OldIdxOut->start = NewIdxDef;
+ OldIdxVNI->def = NewIdxDef;
+ if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdx, OldIdxIn->end))
+ OldIdxIn->end = NewIdx.getRegSlot();
+ }
+ } else {
+ // OldIdxVNI is a dead def. It may have been moved across other values
+ // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut)
+ // down one position.
+ // |- X0/NewIdxOut -| ... |- Xn-1 -| |- Xn/OldIdxOut -| |- next - |
+ // => |- undef/NewIdxOut -| |- X0 -| ... |- Xn-1 -| |- next -|
+ std::copy_backward(NewIdxOut, OldIdxOut, std::next(OldIdxOut));
+ // OldIdxVNI can be reused now to build a new dead def segment.
+ LiveRange::iterator NewSegment = NewIdxOut;
+ VNInfo *NewSegmentVNI = OldIdxVNI;
+ *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(),
+ NewSegmentVNI);
+ NewSegmentVNI->def = NewIdxDef;
+ }
+ }
+ }
+
+ void updateRegMaskSlots() {
+ SmallVectorImpl<SlotIndex>::iterator RI =
+ std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+ OldIdx);
+ assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
+ "No RegMask at OldIdx.");
+ *RI = NewIdx.getRegSlot();
+ assert((RI == LIS.RegMaskSlots.begin() ||
+ SlotIndex::isEarlierInstr(*std::prev(RI), *RI)) &&
+ "Cannot move regmask instruction above another call");
+ assert((std::next(RI) == LIS.RegMaskSlots.end() ||
+ SlotIndex::isEarlierInstr(*RI, *std::next(RI))) &&
+ "Cannot move regmask instruction below another call");
+ }
+
+ // Return the last use of reg between NewIdx and OldIdx.
+ SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg,
+ LaneBitmask LaneMask) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ SlotIndex LastUse = Before;
+ for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+ if (MO.isUndef())
+ continue;
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0 && LaneMask != 0
+ && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0)
+ continue;
+
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+ if (InstSlot > LastUse && InstSlot < OldIdx)
+ LastUse = InstSlot.getRegSlot();
+ }
+ return LastUse;
+ }
+
+ // This is a regunit interval, so scanning the use list could be very
+ // expensive. Scan upwards from OldIdx instead.
+ assert(Before < OldIdx && "Expected upwards move");
+ SlotIndexes *Indexes = LIS.getSlotIndexes();
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(Before);
+
+ // OldIdx may not correspond to an instruction any longer, so set MII to
+ // point to the next instruction after OldIdx, or MBB->end().
+ MachineBasicBlock::iterator MII = MBB->end();
+ if (MachineInstr *MI = Indexes->getInstructionFromIndex(
+ Indexes->getNextNonNullIndex(OldIdx)))
+ if (MI->getParent() == MBB)
+ MII = MI;
+
+ MachineBasicBlock::iterator Begin = MBB->begin();
+ while (MII != Begin) {
+ if ((--MII)->isDebugValue())
+ continue;
+ SlotIndex Idx = Indexes->getInstructionIndex(*MII);
+
+ // Stop searching when Before is reached.
+ if (!SlotIndex::isEarlierInstr(Before, Idx))
+ return Before;
+
+ // Check if MII uses Reg.
+ for (MIBundleOperands MO(*MII); MO.isValid(); ++MO)
+ if (MO->isReg() && !MO->isUndef() &&
+ TargetRegisterInfo::isPhysicalRegister(MO->getReg()) &&
+ TRI.hasRegUnit(MO->getReg(), Reg))
+ return Idx.getRegSlot();
+ }
+ // Didn't reach Before. It must be the first instruction in the block.
+ return Before;
+ }
+};
+
+void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) {
+ assert(!MI.isBundled() && "Can't handle bundled instructions yet.");
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
+ Indexes->removeMachineInstrFromMaps(MI);
+ SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
+ assert(getMBBStartIdx(MI.getParent()) <= OldIndex &&
+ OldIndex < getMBBEndIdx(MI.getParent()) &&
+ "Cannot handle moves across basic block boundaries.");
+
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(&MI);
+}
+
+void LiveIntervals::handleMoveIntoBundle(MachineInstr &MI,
+ MachineInstr &BundleStart,
+ bool UpdateFlags) {
+ SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
+ SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
+ HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+ HME.updateAllRanges(&MI);
+}
+
+void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
+ const MachineBasicBlock::iterator End,
+ const SlotIndex endIdx,
+ LiveRange &LR, const unsigned Reg,
+ LaneBitmask LaneMask) {
+ LiveInterval::iterator LII = LR.find(endIdx);
+ SlotIndex lastUseIdx;
+ if (LII != LR.end() && LII->start < endIdx)
+ lastUseIdx = LII->end;
+ else
+ --LII;
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr &MI = *I;
+ if (MI.isDebugValue())
+ continue;
+
+ SlotIndex instrIdx = getInstructionIndex(MI);
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ bool isEndValid = getInstructionFromIndex(LII->end);
+
+ // FIXME: This doesn't currently handle early-clobber or multiple removed
+ // defs inside of the region to repair.
+ for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+ OE = MI.operands_end();
+ OI != OE; ++OI) {
+ const MachineOperand &MO = *OI;
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ if ((Mask & LaneMask) == 0)
+ continue;
+
+ if (MO.isDef()) {
+ if (!isStartValid) {
+ if (LII->end.isDead()) {
+ SlotIndex prevStart;
+ if (LII != LR.begin())
+ prevStart = std::prev(LII)->start;
+
+ // FIXME: This could be more efficient if there was a
+ // removeSegment method that returned an iterator.
+ LR.removeSegment(*LII, true);
+ if (prevStart.isValid())
+ LII = LR.find(prevStart);
+ else
+ LII = LR.begin();
+ } else {
+ LII->start = instrIdx.getRegSlot();
+ LII->valno->def = instrIdx.getRegSlot();
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ continue;
+ }
+ }
+
+ if (!lastUseIdx.isValid()) {
+ VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator);
+ LiveRange::Segment S(instrIdx.getRegSlot(),
+ instrIdx.getDeadSlot(), VNI);
+ LII = LR.addSegment(S);
+ } else if (LII->start != instrIdx.getRegSlot()) {
+ VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator);
+ LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI);
+ LII = LR.addSegment(S);
+ }
+
+ if (MO.getSubReg() && !MO.isUndef())
+ lastUseIdx = instrIdx.getRegSlot();
+ else
+ lastUseIdx = SlotIndex();
+ } else if (MO.isUse()) {
+ // FIXME: This should probably be handled outside of this branch,
+ // either as part of the def case (for defs inside of the region) or
+ // after the loop over the region.
+ if (!isEndValid && !LII->end.isBlock())
+ LII->end = instrIdx.getRegSlot();
+ if (!lastUseIdx.isValid())
+ lastUseIdx = instrIdx.getRegSlot();
+ }
+ }
+ }
+}
+
+void
+LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ ArrayRef<unsigned> OrigRegs) {
+ // Find anchor points, which are at the beginning/end of blocks or at
+ // instructions that already have indexes.
+ while (Begin != MBB->begin() && !Indexes->hasIndex(*Begin))
+ --Begin;
+ while (End != MBB->end() && !Indexes->hasIndex(*End))
+ ++End;
+
+ SlotIndex endIdx;
+ if (End == MBB->end())
+ endIdx = getMBBEndIdx(MBB).getPrevSlot();
+ else
+ endIdx = getInstructionIndex(*End);
+
+ Indexes->repairIndexesInRange(MBB, Begin, End);
+
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr &MI = *I;
+ if (MI.isDebugValue())
+ continue;
+ for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end();
+ MOI != MOE; ++MOI) {
+ if (MOI->isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MOI->getReg()) &&
+ !hasInterval(MOI->getReg())) {
+ createAndComputeVirtRegInterval(MOI->getReg());
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) {
+ unsigned Reg = OrigRegs[i];
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ LiveInterval &LI = getInterval(Reg);
+ // FIXME: Should we support undefs that gain defs?
+ if (!LI.hasAtLeastOneValue())
+ continue;
+
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask);
+ }
+ repairOldRegInRange(Begin, End, endIdx, LI, Reg);
+ }
+}
+
+void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (LiveRange *LR = getCachedRegUnit(*Units))
+ if (VNInfo *VNI = LR->getVNInfoAt(Pos))
+ LR->removeValNo(VNI);
+ }
+}
+
+void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
+ VNInfo *VNI = LI.getVNInfoAt(Pos);
+ if (VNI == nullptr)
+ return;
+ LI.removeValNo(VNI);
+
+ // Also remove the value in subranges.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if (VNInfo *SVNI = S.getVNInfoAt(Pos))
+ S.removeValNo(SVNI);
+ }
+ LI.removeEmptySubRanges();
+}
+
+void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
+ SmallVectorImpl<LiveInterval*> &SplitLIs) {
+ ConnectedVNInfoEqClasses ConEQ(*this);
+ unsigned NumComp = ConEQ.Classify(LI);
+ if (NumComp <= 1)
+ return;
+ DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
+ unsigned Reg = LI.reg;
+ const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
+ for (unsigned I = 1; I < NumComp; ++I) {
+ unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+ LiveInterval &NewLI = createEmptyInterval(NewVReg);
+ SplitLIs.push_back(&NewLI);
+ }
+ ConEQ.Distribute(LI, SplitLIs.data(), *MRI);
+}
+
+void LiveIntervals::constructMainRangeFromSubranges(LiveInterval &LI) {
+ assert(LRCalc && "LRCalc not initialized.");
+ LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+ LRCalc->constructMainRangeFromSubranges(LI);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 000000000000..025d99ce7881
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,205 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) {
+ if (Range.empty())
+ return;
+ ++Tag;
+
+ // Insert each of the virtual register's live segments into the map.
+ LiveRange::const_iterator RegPos = Range.begin();
+ LiveRange::const_iterator RegEnd = Range.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ while (SegPos.valid()) {
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+ if (++RegPos == RegEnd)
+ return;
+ SegPos.advanceTo(RegPos->start);
+ }
+
+ // We have reached the end of Segments, so it is no longer necessary to search
+ // for the insertion position.
+ // It is faster to insert the end first.
+ --RegEnd;
+ SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg);
+ for (; RegPos != RegEnd; ++RegPos, ++SegPos)
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) {
+ if (Range.empty())
+ return;
+ ++Tag;
+
+ // Remove each of the virtual register's live segments from the map.
+ LiveRange::const_iterator RegPos = Range.begin();
+ LiveRange::const_iterator RegEnd = Range.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ for (;;) {
+ assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+ SegPos.erase();
+ if (!SegPos.valid())
+ return;
+
+ // Skip all segments that may have been coalesced.
+ RegPos = Range.advanceTo(RegPos, SegPos.start());
+ if (RegPos == RegEnd)
+ return;
+
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ if (empty()) {
+ OS << " empty\n";
+ return;
+ }
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+ << PrintReg(SI.value()->reg, TRI);
+ }
+ OS << '\n';
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+ for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+ VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+ SmallVectorImpl<LiveInterval*>::const_iterator I =
+ std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+ return I != InterferingVRegs.end();
+}
+
+// Collect virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The query state is one of:
+//
+// 1. CheckedFirstInterference == false: Iterators are uninitialized.
+// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.
+// 3. Iterators left at the last seen intersection.
+//
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+ // Fast path return if we already have the desired information.
+ if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+
+ // Set up iterators on the first call.
+ if (!CheckedFirstInterference) {
+ CheckedFirstInterference = true;
+
+ // Quickly skip interference check for empty sets.
+ if (VirtReg->empty() || LiveUnion->empty()) {
+ SeenAllInterferences = true;
+ return 0;
+ }
+
+ // In most cases, the union will start before VirtReg.
+ VirtRegI = VirtReg->begin();
+ LiveUnionI.setMap(LiveUnion->getMap());
+ LiveUnionI.find(VirtRegI->start);
+ }
+
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveInterval *RecentReg = nullptr;
+ while (LiveUnionI.valid()) {
+ assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg");
+
+ // Check for overlapping interference.
+ while (VirtRegI->start < LiveUnionI.stop() &&
+ VirtRegI->end > LiveUnionI.start()) {
+ // This is an overlap, record the interfering register.
+ LiveInterval *VReg = LiveUnionI.value();
+ if (VReg != RecentReg && !isSeenInterference(VReg)) {
+ RecentReg = VReg;
+ InterferingVRegs.push_back(VReg);
+ if (InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
+ }
+ // This LiveUnion segment is no longer interesting.
+ if (!(++LiveUnionI).valid()) {
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+ }
+ }
+
+ // The iterators are now not overlapping, LiveUnionI has been advanced
+ // beyond VirtRegI.
+ assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap");
+
+ // Advance the iterator that ends first.
+ VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start());
+ if (VirtRegI == VirtRegEnd)
+ break;
+
+ // Detect overlap, handle above.
+ if (VirtRegI->start < LiveUnionI.stop())
+ continue;
+
+ // Still not overlapping. Catch up LiveUnionI.
+ LiveUnionI.advanceTo(VirtRegI->start);
+ }
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+}
+
+void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
+ unsigned NSize) {
+ // Reuse existing allocation.
+ if (NSize == Size)
+ return;
+ clear();
+ Size = NSize;
+ LIUs = static_cast<LiveIntervalUnion*>(
+ malloc(sizeof(LiveIntervalUnion)*NSize));
+ for (unsigned i = 0; i != Size; ++i)
+ new(LIUs + i) LiveIntervalUnion(Alloc);
+}
+
+void LiveIntervalUnion::Array::clear() {
+ if (!LIUs)
+ return;
+ for (unsigned i = 0; i != Size; ++i)
+ LIUs[i].~LiveIntervalUnion();
+ free(LIUs);
+ Size = 0;
+ LIUs = nullptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
new file mode 100644
index 000000000000..4e2528f47568
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -0,0 +1,189 @@
+//===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LivePhysRegs utility for tracking liveness of
+// physical registers across machine instructions in forward or backward order.
+// A more detailed description can be found in the corresponding header file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+
+/// \brief Remove all registers from the set that get clobbered by the register
+/// mask.
+/// The clobbers set will be the list of live registers clobbered
+/// by the regmask.
+void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
+ SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers) {
+ SparseSet<unsigned>::iterator LRI = LiveRegs.begin();
+ while (LRI != LiveRegs.end()) {
+ if (MO.clobbersPhysReg(*LRI)) {
+ if (Clobbers)
+ Clobbers->push_back(std::make_pair(*LRI, &MO));
+ LRI = LiveRegs.erase(LRI);
+ } else
+ ++LRI;
+ }
+}
+
+/// Simulates liveness when stepping backwards over an instruction(bundle):
+/// Remove Defs, add uses. This is the recommended way of calculating liveness.
+void LivePhysRegs::stepBackward(const MachineInstr &MI) {
+ // Remove defined registers and regmask kills from the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ if (!O->isDef())
+ continue;
+ unsigned Reg = O->getReg();
+ if (Reg == 0)
+ continue;
+ removeReg(Reg);
+ } else if (O->isRegMask())
+ removeRegsInMask(*O, nullptr);
+ }
+
+ // Add uses to the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (!O->isReg() || !O->readsReg())
+ continue;
+ unsigned Reg = O->getReg();
+ if (Reg == 0)
+ continue;
+ addReg(Reg);
+ }
+}
+
+/// Simulates liveness when stepping forward over an instruction(bundle): Remove
+/// killed-uses, add defs. This is the not recommended way, because it depends
+/// on accurate kill flags. If possible use stepBackward() instead of this
+/// function.
+void LivePhysRegs::stepForward(const MachineInstr &MI,
+ SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
+ // Remove killed registers from the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ unsigned Reg = O->getReg();
+ if (Reg == 0)
+ continue;
+ if (O->isDef()) {
+ // Note, dead defs are still recorded. The caller should decide how to
+ // handle them.
+ Clobbers.push_back(std::make_pair(Reg, &*O));
+ } else {
+ if (!O->isKill())
+ continue;
+ assert(O->isUse());
+ removeReg(Reg);
+ }
+ } else if (O->isRegMask())
+ removeRegsInMask(*O, &Clobbers);
+ }
+
+ // Add defs to the set.
+ for (auto Reg : Clobbers) {
+ // Skip dead defs. They shouldn't be added to the set.
+ if (Reg.second->isReg() && Reg.second->isDead())
+ continue;
+ addReg(Reg.first);
+ }
+}
+
+/// Prin the currently live registers to OS.
+void LivePhysRegs::print(raw_ostream &OS) const {
+ OS << "Live Registers:";
+ if (!TRI) {
+ OS << " (uninitialized)\n";
+ return;
+ }
+
+ if (empty()) {
+ OS << " (empty)\n";
+ return;
+ }
+
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ OS << " " << PrintReg(*I, TRI);
+ OS << "\n";
+}
+
+/// Dumps the currently live registers to the debug output.
+LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << " " << *this;
+#endif
+}
+
+bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
+ unsigned Reg) const {
+ if (LiveRegs.count(Reg))
+ return false;
+ if (MRI.isReserved(Reg))
+ return false;
+ for (MCRegAliasIterator R(Reg, TRI, false); R.isValid(); ++R) {
+ if (LiveRegs.count(*R))
+ return false;
+ }
+ return true;
+}
+
+/// Add live-in registers of basic block \p MBB to \p LiveRegs.
+static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
+ for (const auto &LI : MBB.liveins())
+ LiveRegs.addReg(LI.PhysReg);
+}
+
+/// Add pristine registers to the given \p LiveRegs. This function removes
+/// actually saved callee save registers when \p InPrologueEpilogue is false.
+static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
+ const MachineFrameInfo &MFI,
+ const TargetRegisterInfo &TRI) {
+ for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ LiveRegs.addReg(*CSR);
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ LiveRegs.removeReg(Info.getReg());
+}
+
+void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ ::addLiveIns(*this, *Succ);
+}
+
+void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid()) {
+ if (MBB.isReturnBlock()) {
+ // The return block has no successors whose live-ins we could merge
+ // below. So instead we add the callee saved registers manually.
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ addReg(*I);
+ } else {
+ addPristines(*this, MF, MFI, *TRI);
+ }
+ }
+
+ addLiveOutsNoPristines(MBB);
+}
+
+void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid())
+ addPristines(*this, MF, MFI, *TRI);
+ ::addLiveIns(*this, MBB);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
new file mode 100644
index 000000000000..db91ca113dc1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -0,0 +1,484 @@
+//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the LiveRangeCalc class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LiveRangeCalc.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+void LiveRangeCalc::resetLiveOutMap() {
+ unsigned NumBlocks = MF->getNumBlockIDs();
+ Seen.clear();
+ Seen.resize(NumBlocks);
+ Map.resize(NumBlocks);
+}
+
+void LiveRangeCalc::reset(const MachineFunction *mf,
+ SlotIndexes *SI,
+ MachineDominatorTree *MDT,
+ VNInfo::Allocator *VNIA) {
+ MF = mf;
+ MRI = &MF->getRegInfo();
+ Indexes = SI;
+ DomTree = MDT;
+ Alloc = VNIA;
+ resetLiveOutMap();
+ LiveIn.clear();
+}
+
+
+static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
+ LiveRange &LR, const MachineOperand &MO) {
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex DefIdx =
+ Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber());
+
+ // Create the def in LR. This may find an existing def.
+ LR.createDeadDef(DefIdx, Alloc);
+}
+
+void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Step 1: Create minimal live segments for every definition of Reg.
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // createDeadDef() will deduplicate.
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ unsigned Reg = LI.reg;
+ for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
+ LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+
+ // If this is the first time we see a subregister def, initialize
+ // subranges by creating a copy of the main range.
+ if (!LI.hasSubRanges() && !LI.empty()) {
+ LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
+ LI.createSubRangeFrom(*Alloc, ClassMask, LI);
+ }
+
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ // A Mask for subregs common to the existing subrange and current def.
+ LaneBitmask Common = S.LaneMask & Mask;
+ if (Common == 0)
+ continue;
+ // A Mask for subregs covered by the subrange but not the current def.
+ LaneBitmask LRest = S.LaneMask & ~Mask;
+ LiveInterval::SubRange *CommonRange;
+ if (LRest != 0) {
+ // Split current subrange into Common and LRest ranges.
+ S.LaneMask = LRest;
+ CommonRange = LI.createSubRangeFrom(*Alloc, Common, S);
+ } else {
+ assert(Common == S.LaneMask);
+ CommonRange = &S;
+ }
+ if (MO.isDef())
+ createDeadDef(*Indexes, *Alloc, *CommonRange, MO);
+ Mask &= ~Common;
+ }
+ // Create a new SubRange for subregs we did not cover yet.
+ if (Mask != 0) {
+ LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask);
+ if (MO.isDef())
+ createDeadDef(*Indexes, *Alloc, *NewRange, MO);
+ }
+ }
+
+ // Create the def in the main liverange. We do not have to do this if
+ // subranges are tracked as we recreate the main range later in this case.
+ if (MO.isDef() && !LI.hasSubRanges())
+ createDeadDef(*Indexes, *Alloc, LI, MO);
+ }
+
+ // We may have created empty live ranges for partially undefined uses, we
+ // can't keep them because we won't find defs in them later.
+ LI.removeEmptySubRanges();
+
+ // Step 2: Extend live segments to all uses, constructing SSA form as
+ // necessary.
+ if (LI.hasSubRanges()) {
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ resetLiveOutMap();
+ extendToUses(S, Reg, S.LaneMask);
+ }
+ LI.clear();
+ constructMainRangeFromSubranges(LI);
+ } else {
+ resetLiveOutMap();
+ extendToUses(LI, Reg, ~0u);
+ }
+}
+
+void LiveRangeCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
+ // First create dead defs at all defs found in subranges.
+ LiveRange &MainRange = LI;
+ assert(MainRange.segments.empty() && MainRange.valnos.empty() &&
+ "Expect empty main liverange");
+
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ for (const VNInfo *VNI : SR.valnos) {
+ if (!VNI->isUnused() && !VNI->isPHIDef())
+ MainRange.createDeadDef(VNI->def, *Alloc);
+ }
+ }
+
+ resetLiveOutMap();
+ extendToUses(MainRange, LI.reg);
+}
+
+void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
+ assert(MRI && Indexes && "call reset() first");
+
+ // Visit all def operands. If the same instruction has multiple defs of Reg,
+ // LR.createDeadDef() will deduplicate.
+ for (MachineOperand &MO : MRI->def_operands(Reg))
+ createDeadDef(*Indexes, *Alloc, LR, MO);
+}
+
+
+void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
+ LaneBitmask Mask) {
+ // Visit all operands that read Reg. This may include partial defs.
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ // Clear all kill flags. They will be reinserted after register allocation
+ // by LiveIntervalAnalysis::addKillFlags().
+ if (MO.isUse())
+ MO.setIsKill(false);
+ else {
+ // We only care about uses, but on the main range (mask ~0u) this includes
+ // the "virtual" reads happening for subregister defs.
+ if (Mask != ~0u)
+ continue;
+ }
+
+ if (!MO.readsReg())
+ continue;
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+ // Ignore uses not covering the current subrange.
+ if ((SubRegMask & Mask) == 0)
+ continue;
+ }
+
+ // Determine the actual place of the use.
+ const MachineInstr *MI = MO.getParent();
+ unsigned OpNo = (&MO - &MI->getOperand(0));
+ SlotIndex UseIdx;
+ if (MI->isPHI()) {
+ assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
+ // The actual place where a phi operand is used is the end of the pred
+ // MBB. PHI operands are paired: (Reg, PredMBB).
+ UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB());
+ } else {
+ // Check for early-clobber redefs.
+ bool isEarlyClobber = false;
+ unsigned DefIdx;
+ if (MO.isDef())
+ isEarlyClobber = MO.isEarlyClobber();
+ else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) {
+ // FIXME: This would be a lot easier if tied early-clobber uses also
+ // had an early-clobber flag.
+ isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber();
+ }
+ UseIdx = Indexes->getInstructionIndex(*MI).getRegSlot(isEarlyClobber);
+ }
+
+ // MI is reading Reg. We may have visited MI before if it happens to be
+ // reading Reg multiple times. That is OK, extend() is idempotent.
+ extend(LR, UseIdx, Reg);
+ }
+}
+
+
+void LiveRangeCalc::updateFromLiveIns() {
+ LiveRangeUpdater Updater;
+ for (const LiveInBlock &I : LiveIn) {
+ if (!I.DomNode)
+ continue;
+ MachineBasicBlock *MBB = I.DomNode->getBlock();
+ assert(I.Value && "No live-in value found");
+ SlotIndex Start, End;
+ std::tie(Start, End) = Indexes->getMBBRange(MBB);
+
+ if (I.Kill.isValid())
+ // Value is killed inside this block.
+ End = I.Kill;
+ else {
+ // The value is live-through, update LiveOut as well.
+ // Defer the Domtree lookup until it is needed.
+ assert(Seen.test(MBB->getNumber()));
+ Map[MBB] = LiveOutPair(I.Value, nullptr);
+ }
+ Updater.setDest(&I.LR);
+ Updater.add(Start, End, I.Value);
+ }
+ LiveIn.clear();
+}
+
+
+void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
+ assert(Use.isValid() && "Invalid SlotIndex");
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ MachineBasicBlock *UseMBB = Indexes->getMBBFromIndex(Use.getPrevSlot());
+ assert(UseMBB && "No MBB at Use");
+
+ // Is there a def in the same MBB we can extend?
+ if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use))
+ return;
+
+ // Find the single reaching def, or determine if Use is jointly dominated by
+ // multiple values, and we may need to create even more phi-defs to preserve
+ // VNInfo SSA form. Perform a search for all predecessor blocks where we
+ // know the dominating VNInfo.
+ if (findReachingDefs(LR, *UseMBB, Use, PhysReg))
+ return;
+
+ // When there were multiple different values, we may need new PHIs.
+ calculateValues();
+}
+
+
+// This function is called by a client after using the low-level API to add
+// live-out and live-in blocks. The unique value optimization is not
+// available, SplitEditor::transferValues handles that case directly anyway.
+void LiveRangeCalc::calculateValues() {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+ updateSSA();
+ updateFromLiveIns();
+}
+
+
+bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
+ SlotIndex Use, unsigned PhysReg) {
+ unsigned UseMBBNum = UseMBB.getNumber();
+
+ // Block numbers where LR should be live-in.
+ SmallVector<unsigned, 16> WorkList(1, UseMBBNum);
+
+ // Remember if we have seen more than one value.
+ bool UniqueVNI = true;
+ VNInfo *TheVNI = nullptr;
+
+ // Using Seen as a visited set, perform a BFS for all reaching defs.
+ for (unsigned i = 0; i != WorkList.size(); ++i) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]);
+
+#ifndef NDEBUG
+ if (MBB->pred_empty()) {
+ MBB->getParent()->verify();
+ errs() << "Use of " << PrintReg(PhysReg)
+ << " does not have a corresponding definition on every path:\n";
+ const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
+ if (MI != nullptr)
+ errs() << Use << " " << *MI;
+ llvm_unreachable("Use not jointly dominated by defs.");
+ }
+
+ if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ !MBB->isLiveIn(PhysReg)) {
+ MBB->getParent()->verify();
+ errs() << "The register " << PrintReg(PhysReg)
+ << " needs to be live in to BB#" << MBB->getNumber()
+ << ", but is missing from the live-in list.\n";
+ llvm_unreachable("Invalid global physical register");
+ }
+#endif
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+
+ // Is this a known live-out block?
+ if (Seen.test(Pred->getNumber())) {
+ if (VNInfo *VNI = Map[Pred].first) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ }
+ continue;
+ }
+
+ SlotIndex Start, End;
+ std::tie(Start, End) = Indexes->getMBBRange(Pred);
+
+ // First time we see Pred. Try to determine the live-out value, but set
+ // it as null if Pred is live-through with an unknown value.
+ VNInfo *VNI = LR.extendInBlock(Start, End);
+ setLiveOutValue(Pred, VNI);
+ if (VNI) {
+ if (TheVNI && TheVNI != VNI)
+ UniqueVNI = false;
+ TheVNI = VNI;
+ continue;
+ }
+
+ // No, we need a live-in value for Pred as well
+ if (Pred != &UseMBB)
+ WorkList.push_back(Pred->getNumber());
+ else
+ // Loopback to UseMBB, so value is really live through.
+ Use = SlotIndex();
+ }
+ }
+
+ LiveIn.clear();
+
+ // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
+ // neither require it. Skip the sorting overhead for small updates.
+ if (WorkList.size() > 4)
+ array_pod_sort(WorkList.begin(), WorkList.end());
+
+ // If a unique reaching def was found, blit in the live ranges immediately.
+ if (UniqueVNI) {
+ LiveRangeUpdater Updater(&LR);
+ for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(),
+ E = WorkList.end(); I != E; ++I) {
+ SlotIndex Start, End;
+ std::tie(Start, End) = Indexes->getMBBRange(*I);
+ // Trim the live range in UseMBB.
+ if (*I == UseMBBNum && Use.isValid())
+ End = Use;
+ else
+ Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr);
+ Updater.add(Start, End, TheVNI);
+ }
+ return true;
+ }
+
+ // Multiple values were found, so transfer the work list to the LiveIn array
+ // where UpdateSSA will use it as a work list.
+ LiveIn.reserve(WorkList.size());
+ for (SmallVectorImpl<unsigned>::const_iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
+ addLiveInBlock(LR, DomTree->getNode(MBB));
+ if (MBB == &UseMBB)
+ LiveIn.back().Kill = Use;
+ }
+
+ return false;
+}
+
+
+// This is essentially the same iterative algorithm that SSAUpdater uses,
+// except we already have a dominator tree, so we don't have to recompute it.
+void LiveRangeCalc::updateSSA() {
+ assert(Indexes && "Missing SlotIndexes");
+ assert(DomTree && "Missing dominator tree");
+
+ // Interate until convergence.
+ unsigned Changes;
+ do {
+ Changes = 0;
+ // Propagate live-out values down the dominator tree, inserting phi-defs
+ // when necessary.
+ for (LiveInBlock &I : LiveIn) {
+ MachineDomTreeNode *Node = I.DomNode;
+ // Skip block if the live-in value has already been determined.
+ if (!Node)
+ continue;
+ MachineBasicBlock *MBB = Node->getBlock();
+ MachineDomTreeNode *IDom = Node->getIDom();
+ LiveOutPair IDomValue;
+
+ // We need a live-in value to a block with no immediate dominator?
+ // This is probably an unreachable block that has survived somehow.
+ bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber());
+
+ // IDom dominates all of our predecessors, but it may not be their
+ // immediate dominator. Check if any of them have live-out values that are
+ // properly dominated by IDom. If so, we need a phi-def here.
+ if (!needPHI) {
+ IDomValue = Map[IDom->getBlock()];
+
+ // Cache the DomTree node that defined the value.
+ if (IDomValue.first && !IDomValue.second)
+ Map[IDom->getBlock()].second = IDomValue.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def));
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ LiveOutPair &Value = Map[*PI];
+ if (!Value.first || Value.first == IDomValue.first)
+ continue;
+
+ // Cache the DomTree node that defined the value.
+ if (!Value.second)
+ Value.second =
+ DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def));
+
+ // This predecessor is carrying something other than IDomValue.
+ // It could be because IDomValue hasn't propagated yet, or it could be
+ // because MBB is in the dominance frontier of that value.
+ if (DomTree->dominates(IDom, Value.second)) {
+ needPHI = true;
+ break;
+ }
+ }
+ }
+
+ // The value may be live-through even if Kill is set, as can happen when
+ // we are called from extendRange. In that case LiveOutSeen is true, and
+ // LiveOut indicates a foreign or missing value.
+ LiveOutPair &LOP = Map[MBB];
+
+ // Create a phi-def if required.
+ if (needPHI) {
+ ++Changes;
+ assert(Alloc && "Need VNInfo allocator to create PHI-defs");
+ SlotIndex Start, End;
+ std::tie(Start, End) = Indexes->getMBBRange(MBB);
+ LiveRange &LR = I.LR;
+ VNInfo *VNI = LR.getNextValue(Start, *Alloc);
+ I.Value = VNI;
+ // This block is done, we know the final value.
+ I.DomNode = nullptr;
+
+ // Add liveness since updateFromLiveIns now skips this node.
+ if (I.Kill.isValid())
+ LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI));
+ else {
+ LR.addSegment(LiveInterval::Segment(Start, End, VNI));
+ LOP = LiveOutPair(VNI, Node);
+ }
+ } else if (IDomValue.first) {
+ // No phi-def here. Remember incoming value.
+ I.Value = IDomValue.first;
+
+ // If the IDomValue is killed in the block, don't propagate through.
+ if (I.Kill.isValid())
+ continue;
+
+ // Propagate IDomValue if it isn't killed:
+ // MBB is live-out and doesn't define its own value.
+ if (LOP.first == IDomValue.first)
+ continue;
+ ++Changes;
+ LOP = IDomValue;
+ }
+ }
+ } while (Changes);
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
new file mode 100644
index 000000000000..9de48b722881
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -0,0 +1,248 @@
+//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeCalc class can be used to compute live ranges from scratch. It
+// caches information about values in the CFG to speed up repeated operations
+// on the same live range. The cache can be shared by non-overlapping live
+// ranges. SplitKit uses that when computing the live range of split products.
+//
+// A low-level interface is available to clients that know where a variable is
+// live, but don't know which value it has as every point. LiveRangeCalc will
+// propagate values down the dominator tree, and even insert PHI-defs where
+// needed. SplitKit uses this faster interface when possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H
+#define LLVM_LIB_CODEGEN_LIVERANGECALC_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+/// Forward declarations for MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
+class LiveRangeCalc {
+ const MachineFunction *MF;
+ const MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ MachineDominatorTree *DomTree;
+ VNInfo::Allocator *Alloc;
+
+ /// LiveOutPair - A value and the block that defined it. The domtree node is
+ /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
+ typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+
+ /// LiveOutMap - Map basic blocks to the value leaving the block.
+ typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+
+ /// Bit vector of active entries in LiveOut, also used as a visited set by
+ /// findReachingDefs. One entry per basic block, indexed by block number.
+ /// This is kept as a separate bit vector because it can be cleared quickly
+ /// when switching live ranges.
+ BitVector Seen;
+
+ /// Map each basic block where a live range is live out to the live-out value
+ /// and its defining block.
+ ///
+ /// For every basic block, MBB, one of these conditions shall be true:
+ ///
+ /// 1. !Seen.count(MBB->getNumber())
+ /// Blocks without a Seen bit are ignored.
+ /// 2. LiveOut[MBB].second.getNode() == MBB
+ /// The live-out value is defined in MBB.
+ /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB]
+ /// The live-out value passses through MBB. All predecessors must carry
+ /// the same value.
+ ///
+ /// The domtree node may be null, it can be computed.
+ ///
+ /// The map can be shared by multiple live ranges as long as no two are
+ /// live-out of the same block.
+ LiveOutMap Map;
+
+ /// LiveInBlock - Information about a basic block where a live range is known
+ /// to be live-in, but the value has not yet been determined.
+ struct LiveInBlock {
+ // The live range set that is live-in to this block. The algorithms can
+ // handle multiple non-overlapping live ranges simultaneously.
+ LiveRange &LR;
+
+ // DomNode - Dominator tree node for the block.
+ // Cleared when the final value has been determined and LI has been updated.
+ MachineDomTreeNode *DomNode;
+
+ // Position in block where the live-in range ends, or SlotIndex() if the
+ // range passes through the block. When the final value has been
+ // determined, the range from the block start to Kill will be added to LI.
+ SlotIndex Kill;
+
+ // Live-in value filled in by updateSSA once it is known.
+ VNInfo *Value;
+
+ LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill)
+ : LR(LR), DomNode(node), Kill(kill), Value(nullptr) {}
+ };
+
+ /// LiveIn - Work list of blocks where the live-in value has yet to be
+ /// determined. This list is typically computed by findReachingDefs() and
+ /// used as a work list by updateSSA(). The low-level interface may also be
+ /// used to add entries directly.
+ SmallVector<LiveInBlock, 16> LiveIn;
+
+ /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can
+ /// reach it.
+ ///
+ /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB
+ /// are added to @p LR, and the function returns true.
+ ///
+ /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be
+ /// live in are added to the LiveIn array, and the function returns false.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
+ SlotIndex Kill, unsigned PhysReg);
+
+ /// updateSSA - Compute the values that will be live in to all requested
+ /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
+ ///
+ /// Every live-in block must be jointly dominated by the added live-out
+ /// blocks. No values are read from the live ranges.
+ void updateSSA();
+
+ /// Transfer information from the LiveIn vector to the live ranges and update
+ /// the given @p LiveOuts.
+ void updateFromLiveIns();
+
+ /// Extend the live range of @p LR to reach all uses of Reg.
+ ///
+ /// All uses must be jointly dominated by existing liveness. PHI-defs are
+ /// inserted as needed to preserve SSA form.
+ void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask);
+
+ /// Reset Map and Seen fields.
+ void resetLiveOutMap();
+
+public:
+ LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr),
+ DomTree(nullptr), Alloc(nullptr) {}
+
+ //===--------------------------------------------------------------------===//
+ // High-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Calculate live ranges from scratch.
+ //
+
+ /// reset - Prepare caches for a new set of non-overlapping live ranges. The
+ /// caches must be reset before attempting calculations with a live range
+ /// that may overlap a previously computed live range, and before the first
+ /// live range in a function. If live ranges are not known to be
+ /// non-overlapping, call reset before each.
+ void reset(const MachineFunction *MF,
+ SlotIndexes*,
+ MachineDominatorTree*,
+ VNInfo::Allocator*);
+
+ //===--------------------------------------------------------------------===//
+ // Mid-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // Modify existing live ranges.
+ //
+
+ /// Extend the live range of @p LR to reach @p Use.
+ ///
+ /// The existing values in @p LR must be live so they jointly dominate @p Use.
+ /// If @p Use is not dominated by a single existing value, PHI-defs are
+ /// inserted as required to preserve SSA form.
+ ///
+ /// PhysReg, when set, is used to verify live-in lists on basic blocks.
+ void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0);
+
+ /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
+ /// Each instruction defining Reg gets a new VNInfo with a corresponding
+ /// minimal live range.
+ void createDeadDefs(LiveRange &LR, unsigned Reg);
+
+ /// Extend the live range of @p LR to reach all uses of Reg.
+ ///
+ /// All uses must be jointly dominated by existing liveness. PHI-defs are
+ /// inserted as needed to preserve SSA form.
+ void extendToUses(LiveRange &LR, unsigned PhysReg) {
+ extendToUses(LR, PhysReg, ~0u);
+ }
+
+ /// Calculates liveness for the register specified in live interval @p LI.
+ /// Creates subregister live ranges as needed if subreg liveness tracking is
+ /// enabled.
+ void calculate(LiveInterval &LI, bool TrackSubRegs);
+
+ /// For live interval \p LI with correct SubRanges construct matching
+ /// information for the main live range. Expects the main live range to not
+ /// have any segments or value numbers.
+ void constructMainRangeFromSubranges(LiveInterval &LI);
+
+ //===--------------------------------------------------------------------===//
+ // Low-level interface.
+ //===--------------------------------------------------------------------===//
+ //
+ // These functions can be used to compute live ranges where the live-in and
+ // live-out blocks are already known, but the SSA value in each block is
+ // unknown.
+ //
+ // After calling reset(), add known live-out values and known live-in blocks.
+ // Then call calculateValues() to compute the actual value that is
+ // live-in to each block, and add liveness to the live ranges.
+ //
+
+ /// setLiveOutValue - Indicate that VNI is live out from MBB. The
+ /// calculateValues() function will not add liveness for MBB, the caller
+ /// should take care of that.
+ ///
+ /// VNI may be null only if MBB is a live-through block also passed to
+ /// addLiveInBlock().
+ void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) {
+ Seen.set(MBB->getNumber());
+ Map[MBB] = LiveOutPair(VNI, nullptr);
+ }
+
+ /// addLiveInBlock - Add a block with an unknown live-in value. This
+ /// function can only be called once per basic block. Once the live-in value
+ /// has been determined, calculateValues() will add liveness to LI.
+ ///
+ /// @param LR The live range that is live-in to the block.
+ /// @param DomNode The domtree node for the block.
+ /// @param Kill Index in block where LI is killed. If the value is
+ /// live-through, set Kill = SLotIndex() and also call
+ /// setLiveOutValue(MBB, 0).
+ void addLiveInBlock(LiveRange &LR,
+ MachineDomTreeNode *DomNode,
+ SlotIndex Kill = SlotIndex()) {
+ LiveIn.push_back(LiveInBlock(LR, DomNode, Kill));
+ }
+
+ /// calculateValues - Calculate the value that will be live-in to each block
+ /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA
+ /// form. Add liveness to all live-in blocks up to the Kill point, or the
+ /// whole block for live-through blocks.
+ ///
+ /// Every predecessor of a live-in block must have been given a value with
+ /// setLiveOutValue, the value may be null for live-trough blocks.
+ void calculateValues();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 000000000000..b35c0adfacad
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,449 @@
+//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
+STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
+STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
+
+void LiveRangeEdit::Delegate::anchor() { }
+
+LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) {
+ unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ if (VRM) {
+ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+ }
+ LiveInterval &LI = LIS.createEmptyInterval(VReg);
+ return LI;
+}
+
+unsigned LiveRangeEdit::createFrom(unsigned OldReg) {
+ unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ if (VRM) {
+ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+ }
+ return VReg;
+}
+
+bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
+ const MachineInstr *DefMI,
+ AliasAnalysis *aa) {
+ assert(DefMI && "Missing instruction");
+ ScannedRemattable = true;
+ if (!TII.isTriviallyReMaterializable(*DefMI, aa))
+ return false;
+ Remattable.insert(VNI);
+ return true;
+}
+
+void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
+ for (VNInfo *VNI : getParent().valnos) {
+ if (VNI->isUnused())
+ continue;
+ unsigned Original = VRM->getOriginal(getReg());
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+ MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
+ if (!DefMI)
+ continue;
+ checkRematerializable(OrigVNI, DefMI, aa);
+ }
+ ScannedRemattable = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
+ if (!ScannedRemattable)
+ scanRemattable(aa);
+ return !Remattable.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx) const {
+ OrigIdx = OrigIdx.getRegSlot(true);
+ UseIdx = UseIdx.getRegSlot(true);
+ for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OrigMI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
+ continue;
+
+ // We can't remat physreg uses, unless it is a constant.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
+ continue;
+ return false;
+ }
+
+ LiveInterval &li = LIS.getInterval(MO.getReg());
+ const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+
+ // Don't allow rematerialization immediately after the original def.
+ // It would be incorrect if OrigMI redefines the register.
+ // See PR14098.
+ if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
+ return false;
+
+ if (OVNI != li.getVNInfoAt(UseIdx))
+ return false;
+ }
+ return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
+ SlotIndex UseIdx, bool cheapAsAMove) {
+ assert(ScannedRemattable && "Call anyRematerializable first");
+
+ // Use scanRemattable info.
+ if (!Remattable.count(OrigVNI))
+ return false;
+
+ // No defining instruction provided.
+ SlotIndex DefIdx;
+ assert(RM.OrigMI && "No defining instruction for remattable value");
+ DefIdx = LIS.getInstructionIndex(*RM.OrigMI);
+
+ // If only cheap remats were requested, bail out early.
+ if (cheapAsAMove && !TII.isAsCheapAsAMove(*RM.OrigMI))
+ return false;
+
+ // Verify that all used registers are available with the same values.
+ if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx))
+ return false;
+
+ return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ const TargetRegisterInfo &tri,
+ bool Late) {
+ assert(RM.OrigMI && "Invalid remat");
+ TII.reMaterialize(MBB, MI, DestReg, 0, *RM.OrigMI, tri);
+ // DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg
+ // to false anyway in case the isDead flag of RM.OrigMI's dest register
+ // is true.
+ (*--MI).getOperand(0).setIsDead(false);
+ Rematted.insert(RM.ParentVNI);
+ return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot();
+}
+
+void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
+ if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg))
+ LIS.removeInterval(Reg);
+}
+
+bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
+ SmallVectorImpl<MachineInstr*> &Dead) {
+ MachineInstr *DefMI = nullptr, *UseMI = nullptr;
+
+ // Check that there is a single def and a single use.
+ for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) {
+ MachineInstr *MI = MO.getParent();
+ if (MO.isDef()) {
+ if (DefMI && DefMI != MI)
+ return false;
+ if (!MI->canFoldAsLoad())
+ return false;
+ DefMI = MI;
+ } else if (!MO.isUndef()) {
+ if (UseMI && UseMI != MI)
+ return false;
+ // FIXME: Targets don't know how to fold subreg uses.
+ if (MO.getSubReg())
+ return false;
+ UseMI = MI;
+ }
+ }
+ if (!DefMI || !UseMI)
+ return false;
+
+ // Since we're moving the DefMI load, make sure we're not extending any live
+ // ranges.
+ if (!allUsesAvailableAt(DefMI, LIS.getInstructionIndex(*DefMI),
+ LIS.getInstructionIndex(*UseMI)))
+ return false;
+
+ // We also need to make sure it is safe to move the load.
+ // Assume there are stores between DefMI and UseMI.
+ bool SawStore = true;
+ if (!DefMI->isSafeToMove(nullptr, SawStore))
+ return false;
+
+ DEBUG(dbgs() << "Try to fold single def: " << *DefMI
+ << " into single use: " << *UseMI);
+
+ SmallVector<unsigned, 8> Ops;
+ if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
+ return false;
+
+ MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS);
+ if (!FoldMI)
+ return false;
+ DEBUG(dbgs() << " folded: " << *FoldMI);
+ LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
+ UseMI->eraseFromParent();
+ DefMI->addRegisterDead(LI->reg, nullptr);
+ Dead.push_back(DefMI);
+ ++NumDCEFoldedLoads;
+ return true;
+}
+
+bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
+ const MachineOperand &MO) const {
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+ if (LI.Query(Idx).isKill())
+ return true;
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned SubReg = MO.getSubReg();
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+ for (const LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill())
+ return true;
+ }
+ return false;
+}
+
+/// Find all live intervals that need to shrink, then remove the instruction.
+void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
+ AliasAnalysis *AA) {
+ assert(MI->allDefsAreDead() && "Def isn't really dead");
+ SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
+
+ // Never delete a bundled instruction.
+ if (MI->isBundled()) {
+ return;
+ }
+ // Never delete inline asm.
+ if (MI->isInlineAsm()) {
+ DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
+ return;
+ }
+
+ // Use the same criteria as DeadMachineInstructionElim.
+ bool SawStore = false;
+ if (!MI->isSafeToMove(nullptr, SawStore)) {
+ DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
+ return;
+ }
+
+ DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
+
+ // Collect virtual registers to be erased after MI is gone.
+ SmallVector<unsigned, 8> RegsToErase;
+ bool ReadsPhysRegs = false;
+ bool isOrigDef = false;
+ unsigned Dest;
+ if (VRM && MI->getOperand(0).isReg()) {
+ Dest = MI->getOperand(0).getReg();
+ unsigned Original = VRM->getOriginal(Dest);
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx);
+ // The original live-range may have been shrunk to
+ // an empty live-range. It happens when it is dead, but
+ // we still keep it around to be able to rematerialize
+ // other values that depend on it.
+ if (OrigVNI)
+ isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
+ }
+
+ // Check for live intervals that may shrink
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg())
+ continue;
+ unsigned Reg = MOI->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Check if MI reads any unreserved physregs.
+ if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
+ ReadsPhysRegs = true;
+ else if (MOI->isDef())
+ LIS.removePhysRegDefAt(Reg, Idx);
+ continue;
+ }
+ LiveInterval &LI = LIS.getInterval(Reg);
+
+ // Shrink read registers, unless it is likely to be expensive and
+ // unlikely to change anything. We typically don't want to shrink the
+ // PIC base register that has lots of uses everywhere.
+ // Always shrink COPY uses that probably come from live range splitting.
+ if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef())) ||
+ (MOI->readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, *MOI))))
+ ToShrink.insert(&LI);
+
+ // Remove defined value.
+ if (MOI->isDef()) {
+ if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
+ TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
+ LIS.removeVRegDefAt(LI, Idx);
+ if (LI.empty())
+ RegsToErase.push_back(Reg);
+ }
+ }
+
+ // Currently, we don't support DCE of physreg live ranges. If MI reads
+ // any unreserved physregs, don't erase the instruction, but turn it into
+ // a KILL instead. This way, the physreg live ranges don't end up
+ // dangling.
+ // FIXME: It would be better to have something like shrinkToUses() for
+ // physregs. That could potentially enable more DCE and it would free up
+ // the physreg. It would not happen often, though.
+ if (ReadsPhysRegs) {
+ MI->setDesc(TII.get(TargetOpcode::KILL));
+ // Remove all operands that aren't physregs.
+ for (unsigned i = MI->getNumOperands(); i; --i) {
+ const MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+ MI->RemoveOperand(i-1);
+ }
+ DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+ } else {
+ // If the dest of MI is an original reg and MI is reMaterializable,
+ // don't delete the inst. Replace the dest with a new reg, and keep
+ // the inst for remat of other siblings. The inst is saved in
+ // LiveRangeEdit::DeadRemats and will be deleted after all the
+ // allocations of the func are done.
+ if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) {
+ LiveInterval &NewLI = createEmptyIntervalFrom(Dest);
+ VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+ NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
+ pop_back();
+ markDeadRemat(MI);
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ MI->substituteRegister(Dest, NewLI.reg, 0, TRI);
+ MI->getOperand(0).setIsDead(true);
+ } else {
+ if (TheDelegate)
+ TheDelegate->LRE_WillEraseInstruction(MI);
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ ++NumDCEDeleted;
+ }
+ }
+
+ // Erase any virtregs that are now empty and unused. There may be <undef>
+ // uses around. Keep the empty live range in that case.
+ for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) {
+ unsigned Reg = RegsToErase[i];
+ if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) {
+ ToShrink.remove(&LIS.getInterval(Reg));
+ eraseVirtReg(Reg);
+ }
+ }
+}
+
+void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
+ ArrayRef<unsigned> RegsBeingSpilled,
+ AliasAnalysis *AA) {
+ ToShrinkSet ToShrink;
+
+ for (;;) {
+ // Erase all dead defs.
+ while (!Dead.empty())
+ eliminateDeadDef(Dead.pop_back_val(), ToShrink, AA);
+
+ if (ToShrink.empty())
+ break;
+
+ // Shrink just one live interval. Then delete new dead defs.
+ LiveInterval *LI = ToShrink.back();
+ ToShrink.pop_back();
+ if (foldAsLoad(LI, Dead))
+ continue;
+ unsigned VReg = LI->reg;
+ if (TheDelegate)
+ TheDelegate->LRE_WillShrinkVirtReg(VReg);
+ if (!LIS.shrinkToUses(LI, &Dead))
+ continue;
+
+ // Don't create new intervals for a register being spilled.
+ // The new intervals would have to be spilled anyway so its not worth it.
+ // Also they currently aren't spilled so creating them and not spilling
+ // them results in incorrect code.
+ bool BeingSpilled = false;
+ for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
+ if (VReg == RegsBeingSpilled[i]) {
+ BeingSpilled = true;
+ break;
+ }
+ }
+
+ if (BeingSpilled) continue;
+
+ // LI may have been separated, create new intervals.
+ LI->RenumberValues();
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS.splitSeparateComponents(*LI, SplitLIs);
+ if (!SplitLIs.empty())
+ ++NumFracRanges;
+
+ unsigned Original = VRM ? VRM->getOriginal(VReg) : 0;
+ for (const LiveInterval *SplitLI : SplitLIs) {
+ // If LI is an original interval that hasn't been split yet, make the new
+ // intervals their own originals instead of referring to LI. The original
+ // interval must contain all the split products, and LI doesn't.
+ if (Original != VReg && Original != 0)
+ VRM->setIsSplitFromReg(SplitLI->reg, Original);
+ if (TheDelegate)
+ TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg);
+ }
+ }
+}
+
+// Keep track of new virtual registers created via
+// MachineRegisterInfo::createVirtualRegister.
+void
+LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg)
+{
+ if (VRM)
+ VRM->grow();
+
+ NewRegs.push_back(VReg);
+}
+
+void
+LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
+ const MachineLoopInfo &Loops,
+ const MachineBlockFrequencyInfo &MBFI) {
+ VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI);
+ for (unsigned I = 0, Size = size(); I < Size; ++I) {
+ LiveInterval &LI = LIS.getInterval(get(I));
+ if (MRI.recomputeRegClass(LI.reg))
+ DEBUG({
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ dbgs() << "Inflated " << PrintReg(LI.reg) << " to "
+ << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n';
+ });
+ VRAI.calculateSpillWeightAndHint(LI);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeUtils.h b/contrib/llvm/lib/CodeGen/LiveRangeUtils.h
new file mode 100644
index 000000000000..bd57609c3d84
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRangeUtils.h
@@ -0,0 +1,62 @@
+//===-- LiveRangeUtils.h - Live Range modification utilities ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// This file contains helper functions to modify live ranges.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVERANGEUTILS_H
+#define LLVM_LIB_CODEGEN_LIVERANGEUTILS_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+
+namespace llvm {
+
+/// Helper function that distributes live range value numbers and the
+/// corresponding segments of a master live range \p LR to a list of newly
+/// created live ranges \p SplitLRs. \p VNIClasses maps each value number in \p
+/// LR to 0 meaning it should stay or to 1..N meaning it should go to a specific
+/// live range in the \p SplitLRs array.
+template<typename LiveRangeT, typename EqClassesT>
+static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[],
+ EqClassesT VNIClasses) {
+ // Move segments to new intervals.
+ typename LiveRangeT::iterator J = LR.begin(), E = LR.end();
+ while (J != E && VNIClasses[J->valno->id] == 0)
+ ++J;
+ for (typename LiveRangeT::iterator I = J; I != E; ++I) {
+ if (unsigned eq = VNIClasses[I->valno->id]) {
+ assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ SplitLRs[eq-1]->segments.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LR.segments.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LR.getNumValNums();
+ while (j != e && VNIClasses[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LR.getValNumInfo(i);
+ if (unsigned eq = VNIClasses[i]) {
+ VNI->id = SplitLRs[eq-1]->getNumValNums();
+ SplitLRs[eq-1]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LR.valnos[j++] = VNI;
+ }
+ }
+ LR.valnos.resize(j);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
new file mode 100644
index 000000000000..7ee87c1e650f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -0,0 +1,197 @@
+//===-- LiveRegMatrix.cpp - Track register interference -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LiveRegMatrix analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+
+char LiveRegMatrix::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
+ "Live Register Matrix", false, false)
+
+LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID),
+ UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {}
+
+void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.addRequiredTransitive<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getSubtarget().getRegisterInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+
+ unsigned NumRegUnits = TRI->getNumRegUnits();
+ if (NumRegUnits != Matrix.size())
+ Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]);
+ Matrix.init(LIUAlloc, NumRegUnits);
+
+ // Make sure no stale queries get reused.
+ invalidateVirtRegs();
+ return false;
+}
+
+void LiveRegMatrix::releaseMemory() {
+ for (unsigned i = 0, e = Matrix.size(); i != e; ++i) {
+ Matrix[i].clear();
+ // No need to clear Queries here, since LiveIntervalUnion::Query doesn't
+ // have anything important to clear and LiveRegMatrix's runOnFunction()
+ // does a std::unique_ptr::reset anyways.
+ }
+}
+
+template<typename Callable>
+bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval,
+ unsigned PhysReg, Callable Func) {
+ if (VRegInterval.hasSubRanges()) {
+ for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ unsigned Unit = (*Units).first;
+ LaneBitmask Mask = (*Units).second;
+ for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
+ if (S.LaneMask & Mask) {
+ if (Func(Unit, S))
+ return true;
+ break;
+ }
+ }
+ }
+ } else {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ if (Func(*Units, VRegInterval))
+ return true;
+ }
+ }
+ return false;
+}
+
+void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << ':');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+
+ foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
+ const LiveRange &Range) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << ' ' << Range);
+ Matrix[Unit].unify(VirtReg, Range);
+ return false;
+ });
+
+ ++NumAssigned;
+ DEBUG(dbgs() << '\n');
+}
+
+void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
+ unsigned PhysReg = VRM->getPhys(VirtReg.reg);
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << ':');
+ VRM->clearVirt(VirtReg.reg);
+
+ foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
+ const LiveRange &Range) {
+ DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI));
+ Matrix[Unit].extract(VirtReg, Range);
+ return false;
+ });
+
+ ++NumUnassigned;
+ DEBUG(dbgs() << '\n');
+}
+
+bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
+ for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
+ if (!Matrix[*Unit].empty())
+ return true;
+ }
+ return false;
+}
+
+bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ // Check if the cached information is valid.
+ // The same BitVector can be reused for all PhysRegs.
+ // We could cache multiple VirtRegs if it becomes necessary.
+ if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) {
+ RegMaskVirtReg = VirtReg.reg;
+ RegMaskTag = UserTag;
+ RegMaskUsable.clear();
+ LIS->checkRegMaskInterference(VirtReg, RegMaskUsable);
+ }
+
+ // The BitVector is indexed by PhysReg, not register unit.
+ // Regmask interference is more fine grained than regunits.
+ // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8.
+ return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg));
+}
+
+bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ if (VirtReg.empty())
+ return false;
+ CoalescerPair CP(VirtReg.reg, PhysReg, *TRI);
+
+ bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
+ const LiveRange &Range) {
+ const LiveRange &UnitRange = LIS->getRegUnit(Unit);
+ return Range.overlaps(UnitRange, CP, *LIS->getSlotIndexes());
+ });
+ return Result;
+}
+
+LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
+ unsigned RegUnit) {
+ LiveIntervalUnion::Query &Q = Queries[RegUnit];
+ Q.init(UserTag, &VirtReg, &Matrix[RegUnit]);
+ return Q;
+}
+
+LiveRegMatrix::InterferenceKind
+LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
+ if (VirtReg.empty())
+ return IK_Free;
+
+ // Regmask interference is the fastest check.
+ if (checkRegMaskInterference(VirtReg, PhysReg))
+ return IK_RegMask;
+
+ // Check for fixed interference.
+ if (checkRegUnitInterference(VirtReg, PhysReg))
+ return IK_RegUnit;
+
+ // Check the matrix for virtual register interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (query(VirtReg, *Units).checkInterference())
+ return IK_VirtReg;
+
+ return IK_Free;
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
new file mode 100644
index 000000000000..dbf1f96102d1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp
@@ -0,0 +1,88 @@
+//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "livestacks"
+
+char LiveStacks::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequiredTransitive<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
+ S2IMap.clear();
+ S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getSubtarget().getRegisterInfo();
+ // FIXME: No analysis is being done right now. We are relying on the
+ // register allocators to provide the information.
+ return false;
+}
+
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::iterator I = S2IMap.find(Slot);
+ if (I == S2IMap.end()) {
+ I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot),
+ std::forward_as_tuple(
+ TargetRegisterInfo::index2StackSlot(Slot), 0.0F))
+ .first;
+ S2RCMap.insert(std::make_pair(Slot, RC));
+ } else {
+ // Use the largest common subclass register class.
+ const TargetRegisterClass *OldRC = S2RCMap[Slot];
+ S2RCMap[Slot] = TRI->getCommonSubClass(OldRC, RC);
+ }
+ return I->second;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+ OS << "********** INTERVALS **********\n";
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ I->second.print(OS);
+ int Slot = I->first;
+ const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+ if (RC)
+ OS << " [" << TRI->getRegClassName(RC) << "]\n";
+ else
+ OS << " [Unknown]\n";
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 000000000000..dd87216f5e6b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,810 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass. For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using a sparse implementation based on
+// the machine code SSA form. This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function. It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block). If a physical
+// register is not register allocatable, it is not tracked. This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+char &llvm::LiveVariablesID = LiveVariables::ID;
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequiredID(UnreachableMachineBlockElimID);
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ if (Kills[i]->getParent() == MBB)
+ return Kills[i];
+ return nullptr;
+}
+
+LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << " Alive in blocks: ";
+ for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
+ E = AliveBlocks.end(); I != E; ++I)
+ dbgs() << *I << ", ";
+ dbgs() << "\n Killed by:";
+ if (Kills.empty())
+ dbgs() << " No instructions.\n";
+ else {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ dbgs() << "\n #" << i << ": " << *Kills[i];
+ dbgs() << "\n";
+ }
+#endif
+}
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+ assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+ "getVarInfo: not a virtual register!");
+ VirtRegInfo.grow(RegIdx);
+ return VirtRegInfo[RegIdx];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB,
+ std::vector<MachineBasicBlock*> &WorkList) {
+ unsigned BBNum = MBB->getNumber();
+
+ // Check to see if this basic block is one of the killing blocks. If so,
+ // remove it.
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ if (VRInfo.Kills[i]->getParent() == MBB) {
+ VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry
+ break;
+ }
+
+ if (MBB == DefBlock) return; // Terminate recursion
+
+ if (VRInfo.AliveBlocks.test(BBNum))
+ return; // We already know the block is live
+
+ // Mark the variable known alive in this bb
+ VRInfo.AliveBlocks.set(BBNum);
+
+ assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
+ WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+ MachineBasicBlock *DefBlock,
+ MachineBasicBlock *MBB) {
+ std::vector<MachineBasicBlock*> WorkList;
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+ while (!WorkList.empty()) {
+ MachineBasicBlock *Pred = WorkList.back();
+ WorkList.pop_back();
+ MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+ }
+}
+
+void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+ MachineInstr &MI) {
+ assert(MRI->getVRegDef(reg) && "Register use before def!");
+
+ unsigned BBNum = MBB->getNumber();
+
+ VarInfo& VRInfo = getVarInfo(reg);
+
+ // Check to see if this basic block is already a kill block.
+ if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+ // Yes, this register is killed in this basic block already. Increase the
+ // live range by updating the kill instruction.
+ VRInfo.Kills.back() = &MI;
+ return;
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+ assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+ // This situation can occur:
+ //
+ // ,------.
+ // | |
+ // | v
+ // | t2 = phi ... t1 ...
+ // | |
+ // | v
+ // | t1 = ...
+ // | ... = ... t1 ...
+ // | |
+ // `------'
+ //
+ // where there is a use in a PHI node that's a predecessor to the defining
+ // block. We don't want to mark all predecessors as having the value "alive"
+ // in this case.
+ if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+
+ // Add a new kill entry for this basic block. If this virtual register is
+ // already marked as alive in this basic block, that means it is alive in at
+ // least one of the successor blocks, it's not a kill.
+ if (!VRInfo.AliveBlocks.test(BBNum))
+ VRInfo.Kills.push_back(&MI);
+
+ // Update all dominating blocks to mark them as "known live".
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ E = MBB->pred_end(); PI != E; ++PI)
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+}
+
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr &MI) {
+ VarInfo &VRInfo = getVarInfo(Reg);
+
+ if (VRInfo.AliveBlocks.empty())
+ // If vr is not alive in any block, then defaults to dead.
+ VRInfo.Kills.push_back(&MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-registers that're defined by the instruction.
+MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
+ SmallSet<unsigned,4> &PartDefRegs) {
+ unsigned LastDefReg = 0;
+ unsigned LastDefDist = 0;
+ MachineInstr *LastDef = nullptr;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (!Def)
+ continue;
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastDefDist) {
+ LastDefReg = SubReg;
+ LastDef = Def;
+ LastDefDist = Dist;
+ }
+ }
+
+ if (!LastDef)
+ return nullptr;
+
+ PartDefRegs.insert(LastDefReg);
+ for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = LastDef->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ continue;
+ unsigned DefReg = MO.getReg();
+ if (TRI->isSubRegister(Reg, DefReg)) {
+ for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ PartDefRegs.insert(*SubRegs);
+ }
+ }
+ return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ // If there was a previous use or a "full" def all is well.
+ if (!LastDef && !PhysRegUse[Reg]) {
+ // Otherwise, the last sub-register def implicitly defines this register.
+ // e.g.
+ // AH =
+ // AL = ... <imp-def EAX>, <imp-kill AH>
+ // = AH
+ // ...
+ // = EAX
+ // All of the sub-registers must have been defined before the use of Reg!
+ SmallSet<unsigned, 4> PartDefRegs;
+ MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
+ // If LastPartialDef is NULL, it must be using a livein register.
+ if (LastPartialDef) {
+ LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[Reg] = LastPartialDef;
+ SmallSet<unsigned, 8> Processed;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (Processed.count(SubReg))
+ continue;
+ if (PartDefRegs.count(SubReg))
+ continue;
+ // This part of Reg was defined before the last partial def. It's killed
+ // here.
+ LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+ false/*IsDef*/,
+ true/*IsImp*/));
+ PhysRegDef[SubReg] = LastPartialDef;
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ Processed.insert(*SS);
+ }
+ }
+ } else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
+ // Last def defines the super register, add an implicit def of reg.
+ LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+ true/*IsImp*/));
+
+ // Remember this use.
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ PhysRegUse[*SubRegs] = &MI;
+}
+
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return nullptr;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ unsigned LastPartDefDist = 0;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist)
+ LastPartDefDist = Dist;
+ } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ return LastRefOrPartRef;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
+ MachineInstr *LastUse = PhysRegUse[Reg];
+ if (!LastDef && !LastUse)
+ return false;
+
+ MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+ unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+ // The whole register is used.
+ // AL =
+ // AH =
+ //
+ // = AX
+ // = AL, AX<imp-use, kill>
+ // AX =
+ //
+ // Or whole register is defined, but not used at all.
+ // AX<dead> =
+ // ...
+ // AX =
+ //
+ // Or whole register is defined, but only partly used.
+ // AX<dead> = AL<imp-def>
+ // = AL<kill>
+ // AX =
+ MachineInstr *LastPartDef = nullptr;
+ unsigned LastPartDefDist = 0;
+ SmallSet<unsigned, 8> PartUses;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ MachineInstr *Def = PhysRegDef[SubReg];
+ if (Def && Def != LastDef) {
+ // There was a def of this sub-register in between. This is a partial
+ // def, keep track of the last one.
+ unsigned Dist = DistanceMap[Def];
+ if (Dist > LastPartDefDist) {
+ LastPartDefDist = Dist;
+ LastPartDef = Def;
+ }
+ continue;
+ }
+ if (MachineInstr *Use = PhysRegUse[SubReg]) {
+ for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid();
+ ++SS)
+ PartUses.insert(*SS);
+ unsigned Dist = DistanceMap[Use];
+ if (Dist > LastRefOrPartRefDist) {
+ LastRefOrPartRefDist = Dist;
+ LastRefOrPartRef = Use;
+ }
+ }
+ }
+
+ if (!PhysRegUse[Reg]) {
+ // Partial uses. Mark register def dead and add implicit def of
+ // sub-registers which are used.
+ // EAX<dead> = op AL<imp-def>
+ // That is, EAX def is dead but AL def extends pass it.
+ PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (!PartUses.count(SubReg))
+ continue;
+ bool NeedDef = true;
+ if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+ MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+ if (MO) {
+ NeedDef = false;
+ assert(!MO->isDead());
+ }
+ }
+ if (NeedDef)
+ PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+ true/*IsDef*/, true/*IsImp*/));
+ MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+ if (LastSubRef)
+ LastSubRef->addRegisterKilled(SubReg, TRI, true);
+ else {
+ LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+ for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true);
+ SS.isValid(); ++SS)
+ PhysRegUse[*SS] = LastRefOrPartRef;
+ }
+ for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
+ PartUses.erase(*SS);
+ }
+ } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+ if (LastPartDef)
+ // The last partial def kills the register.
+ LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/, true/*IsKill*/));
+ else {
+ MachineOperand *MO =
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+ bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+ // If the last reference is the last def, then it's not used at all.
+ // That is, unless we are currently processing the last reference itself.
+ LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+ if (NeedEC) {
+ // If we are adding a subreg def and the superreg def is marked early
+ // clobber, add an early clobber marker to the subreg def.
+ MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+ if (MO)
+ MO->setIsEarlyClobber();
+ }
+ }
+ } else
+ LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+ return true;
+}
+
+void LiveVariables::HandleRegMask(const MachineOperand &MO) {
+ // Call HandlePhysRegKill() for all live registers clobbered by Mask.
+ // Clobbered registers are always dead, sp there is no need to use
+ // HandlePhysRegDef().
+ for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
+ // Skip dead regs.
+ if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
+ continue;
+ // Skip mask-preserved regs.
+ if (!MO.clobbersPhysReg(Reg))
+ continue;
+ // Kill the largest clobbered super-register.
+ // This avoids needless implicit operands.
+ unsigned Super = Reg;
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
+ if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
+ Super = *SR;
+ HandlePhysRegKill(Super, nullptr);
+ }
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+ SmallVectorImpl<unsigned> &Defs) {
+ // What parts of the register are previously defined?
+ SmallSet<unsigned, 32> Live;
+ if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ Live.insert(*SubRegs);
+ } else {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ // If a register isn't itself defined, but all parts that make up of it
+ // are defined, then consider it also defined.
+ // e.g.
+ // AL =
+ // AH =
+ // = AX
+ if (Live.count(SubReg))
+ continue;
+ if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+ for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true);
+ SS.isValid(); ++SS)
+ Live.insert(*SS);
+ }
+ }
+ }
+
+ // Start from the largest piece, find the last time any part of the register
+ // is referenced.
+ HandlePhysRegKill(Reg, MI);
+ // Only some of the sub-registers are used.
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (!Live.count(SubReg))
+ // Skip if this sub-register isn't defined.
+ continue;
+ HandlePhysRegKill(SubReg, MI);
+ }
+
+ if (MI)
+ Defs.push_back(Reg); // Remember this def.
+}
+
+void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
+ SmallVectorImpl<unsigned> &Defs) {
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.back();
+ Defs.pop_back();
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ PhysRegDef[SubReg] = &MI;
+ PhysRegUse[SubReg] = nullptr;
+ }
+ }
+}
+
+void LiveVariables::runOnInstr(MachineInstr &MI,
+ SmallVectorImpl<unsigned> &Defs) {
+ assert(!MI.isDebugValue());
+ // Process all of the operands of the instruction...
+ unsigned NumOperandsToProcess = MI.getNumOperands();
+
+ // Unless it is a PHI node. In this case, ONLY process the DEF, not any
+ // of the uses. They will be handled in other basic blocks.
+ if (MI.isPHI())
+ NumOperandsToProcess = 1;
+
+ // Clear kill and dead markers. LV will recompute them.
+ SmallVector<unsigned, 4> UseRegs;
+ SmallVector<unsigned, 4> DefRegs;
+ SmallVector<unsigned, 1> RegMasks;
+ for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isRegMask()) {
+ RegMasks.push_back(i);
+ continue;
+ }
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ MRI->isReserved(MOReg)))
+ MO.setIsKill(false);
+ if (MO.readsReg())
+ UseRegs.push_back(MOReg);
+ } else {
+ assert(MO.isDef());
+ // FIXME: We should not remove any dead flags. However the MIPS RDDSP
+ // instruction needs it at the moment: http://llvm.org/PR27116.
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ !MRI->isReserved(MOReg))
+ MO.setIsDead(false);
+ DefRegs.push_back(MOReg);
+ }
+ }
+
+ MachineBasicBlock *MBB = MI.getParent();
+ // Process all uses.
+ for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+ unsigned MOReg = UseRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegUse(MOReg, MBB, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegUse(MOReg, MI);
+ }
+
+ // Process all masked registers. (Call clobbers).
+ for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
+ HandleRegMask(MI.getOperand(RegMasks[i]));
+
+ // Process all defs.
+ for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+ unsigned MOReg = DefRegs[i];
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ HandleVirtRegDef(MOReg, MI);
+ else if (!MRI->isReserved(MOReg))
+ HandlePhysRegDef(MOReg, &MI, Defs);
+ }
+ UpdatePhysRegDefs(MI, Defs);
+}
+
+void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
+ // Mark live-in registers as live-in.
+ SmallVector<unsigned, 4> Defs;
+ for (const auto &LI : MBB->liveins()) {
+ assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) &&
+ "Cannot have a live-in virtual register!");
+ HandlePhysRegDef(LI.PhysReg, nullptr, Defs);
+ }
+
+ // Loop over all of the instructions, processing them.
+ DistanceMap.clear();
+ unsigned Dist = 0;
+ for (MachineInstr &MI : *MBB) {
+ if (MI.isDebugValue())
+ continue;
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
+
+ runOnInstr(MI, Defs);
+ }
+
+ // Handle any virtual assignments from PHI nodes which might be at the
+ // bottom of this basic block. We check all of our successor blocks to see
+ // if they have PHI nodes, and if so, we simulate an assignment at the end
+ // of the current block.
+ if (!PHIVarInfo[MBB->getNumber()].empty()) {
+ SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+ for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(),
+ E = VarInfoVec.end(); I != E; ++I)
+ // Mark it alive only in the block we are representing.
+ MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+ MBB);
+ }
+
+ // MachineCSE may CSE instructions which write to non-allocatable physical
+ // registers across MBBs. Remember if any reserved register is liveout.
+ SmallSet<unsigned, 4> LiveOuts;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+ if (SuccMBB->isEHPad())
+ continue;
+ for (const auto &LI : SuccMBB->liveins()) {
+ if (!TRI->isInAllocatableClass(LI.PhysReg))
+ // Ignore other live-ins, e.g. those that are live into landing pads.
+ LiveOuts.insert(LI.PhysReg);
+ }
+ }
+
+ // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+ // available at the end of the basic block.
+ for (unsigned i = 0; i != NumRegs; ++i)
+ if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
+ HandlePhysRegDef(i, nullptr, Defs);
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ MRI = &mf.getRegInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+
+ const unsigned NumRegs = TRI->getNumRegs();
+ PhysRegDef.assign(NumRegs, nullptr);
+ PhysRegUse.assign(NumRegs, nullptr);
+ PHIVarInfo.resize(MF->getNumBlockIDs());
+ PHIJoins.clear();
+
+ // FIXME: LiveIntervals will be updated to remove its dependence on
+ // LiveVariables to improve compilation time and eliminate bizarre pass
+ // dependencies. Until then, we can't change much in -O0.
+ if (!MRI->isSSA())
+ report_fatal_error("regalloc=... not currently supported with -O0");
+
+ analyzePHINodes(mf);
+
+ // Calculate live variable information in depth first order on the CFG of the
+ // function. This guarantees that we will see the definition of a virtual
+ // register before its uses due to dominance properties of SSA (except for PHI
+ // nodes, which are treated as a special case).
+ MachineBasicBlock *Entry = &MF->front();
+ SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+ for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
+ runOnBlock(MBB, NumRegs);
+
+ PhysRegDef.assign(NumRegs, nullptr);
+ PhysRegUse.assign(NumRegs, nullptr);
+ }
+
+ // Convert and transfer the dead / killed information we have gathered into
+ // VirtRegInfo onto MI's.
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+ const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+ VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
+ else
+ VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+ }
+
+ // Check to make sure there are no unreachable blocks in the MC CFG for the
+ // function. If so, it is due to a bug in the instruction selector or some
+ // other part of the code generator if this happens.
+#ifndef NDEBUG
+ for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+ assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+ PhysRegDef.clear();
+ PhysRegUse.clear();
+ PHIVarInfo.clear();
+
+ return false;
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr &OldMI,
+ MachineInstr &NewMI) {
+ VarInfo &VI = getVarInfo(Reg);
+ std::replace(VI.Kills.begin(), VI.Kills.end(), &OldMI, &NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.isKill()) {
+ MO.setIsKill(false);
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ bool removed = getVarInfo(Reg).removeKill(MI);
+ assert(removed && "kill not in register's VarInfo?");
+ (void)removed;
+ }
+ }
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+ for (const auto &MBB : Fn)
+ for (const auto &BBI : MBB) {
+ if (!BBI.isPHI())
+ break;
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2)
+ if (BBI.getOperand(i).readsReg())
+ PHIVarInfo[BBI.getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI.getOperand(i).getReg());
+ }
+}
+
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+ unsigned Reg,
+ MachineRegisterInfo &MRI) {
+ unsigned Num = MBB.getNumber();
+
+ // Reg is live-through.
+ if (AliveBlocks.test(Num))
+ return true;
+
+ // Registers defined in MBB cannot be live in.
+ const MachineInstr *Def = MRI.getVRegDef(Reg);
+ if (Def && Def->getParent() == &MBB)
+ return false;
+
+ // Reg was not defined in MBB, was it killed here?
+ return findKill(&MBB);
+}
+
+bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+ LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+ SmallPtrSet<const MachineBasicBlock *, 8> Kills;
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ Kills.insert(VI.Kills[i]->getParent());
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (VI.AliveBlocks.test(SuccIdx))
+ return true;
+ // Or is it live because there is a use in a successor that kills it?
+ if (Kills.count(SuccMBB))
+ return true;
+ }
+
+ return false;
+}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB,
+ MachineBasicBlock *SuccBB) {
+ const unsigned NumNew = BB->getNumber();
+
+ SmallSet<unsigned, 16> Defs, Kills;
+
+ MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end();
+ for (; BBI != BBE && BBI->isPHI(); ++BBI) {
+ // Record the def of the PHI node.
+ Defs.insert(BBI->getOperand(0).getReg());
+
+ // All registers used by PHI nodes in SuccBB must be live through BB.
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+ if (BBI->getOperand(i+1).getMBB() == BB)
+ getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+ }
+
+ // Record all vreg defs and kills of all instructions in SuccBB.
+ for (; BBI != BBE; ++BBI) {
+ for (MachineInstr::mop_iterator I = BBI->operands_begin(),
+ E = BBI->operands_end(); I != E; ++I) {
+ if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) {
+ if (I->isDef())
+ Defs.insert(I->getReg());
+ else if (I->isKill())
+ Kills.insert(I->getReg());
+ }
+ }
+ }
+
+ // Update info for all live variables
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+ // If the Defs is defined in the successor it can't be live in BB.
+ if (Defs.count(Reg))
+ continue;
+
+ // If the register is either killed in or live through SuccBB it's also live
+ // through BB.
+ VarInfo &VI = getVarInfo(Reg);
+ if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber()))
+ VI.AliveBlocks.set(NumNew);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 000000000000..af7392f4435b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,420 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates they are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "localstackalloc"
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+ class FrameRef {
+ MachineBasicBlock::iterator MI; // Instr referencing the frame
+ int64_t LocalOffset; // Local offset of the frame idx referenced
+ int FrameIdx; // The frame index
+ public:
+ FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) :
+ MI(I), LocalOffset(Offset), FrameIdx(Idx) {}
+ bool operator<(const FrameRef &RHS) const {
+ return LocalOffset < RHS.LocalOffset;
+ }
+ MachineBasicBlock::iterator getMachineInstr() const { return MI; }
+ int64_t getLocalOffset() const { return LocalOffset; }
+ int getFrameIndex() const { return FrameIdx; }
+ };
+
+ class LocalStackSlotPass: public MachineFunctionPass {
+ SmallVector<int64_t,16> LocalOffsets;
+ /// StackObjSet - A set of stack object indexes
+ typedef SmallSetVector<int, 8> StackObjSet;
+
+ void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown, unsigned &MaxAlign);
+ void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+ SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo *MFI, bool StackGrowsDown,
+ int64_t &Offset, unsigned &MaxAlign);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ bool insertFrameReferenceRegisters(MachineFunction &Fn);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LocalStackSlotPass() : MachineFunctionPass(ID) {
+ initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<StackProtector>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ };
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc",
+ "Local Stack Slot Allocation", false, false)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc",
+ "Local Stack Slot Allocation", false, false)
+
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+
+ // If the target doesn't want/need this pass, or if there are no locals
+ // to consider, early exit.
+ if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+ return true;
+
+ // Make sure we have enough space to store the local offsets.
+ LocalOffsets.resize(MFI->getObjectIndexEnd());
+
+ // Lay out the local blob.
+ calculateFrameObjectOffsets(MF);
+
+ // Insert virtual base registers to resolve frame index references.
+ bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+ // Tell MFI whether any base registers were allocated. PEI will only
+ // want to use the local block allocations from this pass if there were any.
+ // Otherwise, PEI can do a bit better job of getting the alignment right
+ // without a hole at the start since it knows the alignment of the stack
+ // at the start of local allocation, and this pass doesn't.
+ MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+ return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+ int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+ DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+ << LocalOffset << "\n");
+ // Keep the offset available for base register allocation
+ LocalOffsets[FrameIdx] = LocalOffset;
+ // And tell MFI about it for PEI to use later
+ MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+
+ if (!StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ ++NumAllocations;
+}
+
+/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
+/// those required to be close to the Stack Protector) to stack offsets.
+void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+ SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo *MFI,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign) {
+
+ for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
+ E = UnassignedObjs.end(); I != E; ++I) {
+ int i = *I;
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ ProtectedObjs.insert(i);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ int64_t Offset = 0;
+ unsigned MaxAlign = 0;
+ StackProtector *SP = &getAnalysis<StackProtector>();
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> ProtectedObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ StackObjSet LargeArrayObjs;
+ StackObjSet SmallArrayObjs;
+ StackObjSet AddrOfObjs;
+
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+ StackGrowsDown, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+
+ switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+ case StackProtector::SSPLK_None:
+ continue;
+ case StackProtector::SSPLK_SmallArray:
+ SmallArrayObjs.insert(i);
+ continue;
+ case StackProtector::SSPLK_AddrOf:
+ AddrOfObjs.insert(i);
+ continue;
+ case StackProtector::SSPLK_LargeArray:
+ LargeArrayObjs.insert(i);
+ continue;
+ }
+ llvm_unreachable("Unexpected SSPLayoutKind.");
+ }
+
+ AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign);
+ AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign);
+ AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign);
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (ProtectedObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ }
+
+ // Remember how big this blob of stack space is
+ MFI->setLocalFrameSize(Offset);
+ MFI->setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(unsigned BaseReg,
+ int64_t BaseOffset,
+ int64_t FrameSizeAdjust,
+ int64_t LocalFrameOffset,
+ const MachineInstr &MI,
+ const TargetRegisterInfo *TRI) {
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
+ return TRI->isFrameOffsetLegal(&MI, BaseReg, Offset);
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+ // Scan the function's instructions looking for frame index references.
+ // For each, ask the target if it wants a virtual base register for it
+ // based on what we can tell it about where the local will end up in the
+ // stack frame. If it wants one, re-use a suitable one we've previously
+ // allocated, or if there isn't one that fits the bill, allocate a new one
+ // and ask the target to create a defining instruction for it.
+ bool UsedBaseReg = false;
+
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Collect all of the instructions in the block that reference
+ // a frame index. Also store the frame index referenced to ease later
+ // lookup. (For any insn that has more than one FI reference, we arbitrarily
+ // choose the first one).
+ SmallVector<FrameRef, 64> FrameReferenceInsns;
+
+ for (MachineBasicBlock &BB : Fn) {
+ for (MachineInstr &MI : BB) {
+ // Debug value, stackmap and patchpoint instructions can't be out of
+ // range, so they don't need any updates.
+ if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STATEPOINT ||
+ MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT)
+ continue;
+
+ // For now, allocate the base register(s) within the basic block
+ // where they're used, and don't try to keep them around outside
+ // of that. It may be beneficial to try sharing them more broadly
+ // than that, but the increased register pressure makes that a
+ // tricky thing to balance. Investigate if re-materializing these
+ // becomes an issue.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI.getOperand(i).isFI()) {
+ // Don't try this with values not in the local block.
+ if (!MFI->isObjectPreAllocated(MI.getOperand(i).getIndex()))
+ break;
+ int Idx = MI.getOperand(i).getIndex();
+ int64_t LocalOffset = LocalOffsets[Idx];
+ if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
+ break;
+ FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx));
+ break;
+ }
+ }
+ }
+ }
+
+ // Sort the frame references by local offset
+ array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+
+ MachineBasicBlock *Entry = &Fn.front();
+
+ unsigned BaseReg = 0;
+ int64_t BaseOffset = 0;
+
+ // Loop through the frame references and allocate for them as necessary.
+ for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+ FrameRef &FR = FrameReferenceInsns[ref];
+ MachineInstr &MI = *FR.getMachineInstr();
+ int64_t LocalOffset = FR.getLocalOffset();
+ int FrameIdx = FR.getFrameIndex();
+ assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ DEBUG(dbgs() << "Considering: " << MI);
+
+ unsigned idx = 0;
+ for (unsigned f = MI.getNumOperands(); idx != f; ++idx) {
+ if (!MI.getOperand(idx).isFI())
+ continue;
+
+ if (FrameIdx == MI.getOperand(idx).getIndex())
+ break;
+ }
+
+ assert(idx < MI.getNumOperands() && "Cannot find FI operand");
+
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+ DEBUG(dbgs() << " Replacing FI in: " << MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ if (UsedBaseReg &&
+ lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust,
+ LocalOffset, MI, TRI)) {
+ DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
+ // We found a register to reuse.
+ Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
+ } else {
+ // No previously defined register was in range, so create a new one.
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(&MI, idx);
+
+ int64_t PrevBaseOffset = BaseOffset;
+ BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset;
+
+ // We'd like to avoid creating single-use virtual base registers.
+ // Because the FrameRefs are in sorted order, and we've already
+ // processed all FrameRefs before this one, just check whether or not
+ // the next FrameRef will be able to reuse this new register. If not,
+ // then don't bother creating it.
+ if (ref + 1 >= e ||
+ !lookupCandidateBaseReg(
+ BaseReg, BaseOffset, FrameSizeAdjust,
+ FrameReferenceInsns[ref + 1].getLocalOffset(),
+ *FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) {
+ BaseOffset = PrevBaseOffset;
+ continue;
+ }
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
+ BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+ DEBUG(dbgs() << " Materializing base register " << BaseReg <<
+ " at frame local offset " << LocalOffset + InstrOffset << "\n");
+
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+ InstrOffset);
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ ++NumBaseRegisters;
+ UsedBaseReg = true;
+ }
+ assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(MI, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << MI);
+
+ ++NumReplacements;
+ }
+
+ return UsedBaseReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
new file mode 100644
index 000000000000..6966c8ca4a5f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -0,0 +1,162 @@
+//===- LowerEmuTLS.cpp - Add __emutls_[vt].* variables --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is required for targets depending on libgcc style
+// emulated thread local storage variables. For every defined TLS variable xyz,
+// an __emutls_v.xyz is generated. If there is non-zero initialized value
+// an __emutls_t.xyz is also generated.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loweremutls"
+
+namespace {
+
+class LowerEmuTLS : public ModulePass {
+ const TargetMachine *TM;
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LowerEmuTLS() : ModulePass(ID), TM(nullptr) { }
+ explicit LowerEmuTLS(const TargetMachine *TM)
+ : ModulePass(ID), TM(TM) {
+ initializeLowerEmuTLSPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override;
+private:
+ bool addEmuTlsVar(Module &M, const GlobalVariable *GV);
+ static void copyLinkageVisibility(Module &M,
+ const GlobalVariable *from,
+ GlobalVariable *to) {
+ to->setLinkage(from->getLinkage());
+ to->setVisibility(from->getVisibility());
+ if (from->hasComdat()) {
+ to->setComdat(M.getOrInsertComdat(to->getName()));
+ to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind());
+ }
+ }
+};
+}
+
+char LowerEmuTLS::ID = 0;
+
+INITIALIZE_PASS(LowerEmuTLS, "loweremutls",
+ "Add __emutls_[vt]. variables for emultated TLS model",
+ false, false)
+
+ModulePass *llvm::createLowerEmuTLSPass(const TargetMachine *TM) {
+ return new LowerEmuTLS(TM);
+}
+
+bool LowerEmuTLS::runOnModule(Module &M) {
+ if (skipModule(M))
+ return false;
+
+ if (!TM || !TM->Options.EmulatedTLS)
+ return false;
+
+ bool Changed = false;
+ SmallVector<const GlobalVariable*, 8> TlsVars;
+ for (const auto &G : M.globals()) {
+ if (G.isThreadLocal())
+ TlsVars.append({&G});
+ }
+ for (const auto G : TlsVars)
+ Changed |= addEmuTlsVar(M, G);
+ return Changed;
+}
+
+bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
+ LLVMContext &C = M.getContext();
+ PointerType *VoidPtrType = Type::getInt8PtrTy(C);
+
+ std::string EmuTlsVarName = ("__emutls_v." + GV->getName()).str();
+ GlobalVariable *EmuTlsVar = M.getNamedGlobal(EmuTlsVarName);
+ if (EmuTlsVar)
+ return false; // It has been added before.
+
+ const DataLayout &DL = M.getDataLayout();
+ Constant *NullPtr = ConstantPointerNull::get(VoidPtrType);
+
+ // Get non-zero initializer from GV's initializer.
+ const Constant *InitValue = nullptr;
+ if (GV->hasInitializer()) {
+ InitValue = GV->getInitializer();
+ const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue);
+ // When GV's init value is all 0, omit the EmuTlsTmplVar and let
+ // the emutls library function to reset newly allocated TLS variables.
+ if (isa<ConstantAggregateZero>(InitValue) ||
+ (InitIntValue && InitIntValue->isZero()))
+ InitValue = nullptr;
+ }
+
+ // Create the __emutls_v. symbol, whose type has 4 fields:
+ // word size; // size of GV in bytes
+ // word align; // alignment of GV
+ // void *ptr; // initialized to 0; set at run time per thread.
+ // void *templ; // 0 or point to __emutls_t.*
+ // sizeof(word) should be the same as sizeof(void*) on target.
+ IntegerType *WordType = DL.getIntPtrType(C);
+ PointerType *InitPtrType = InitValue ?
+ PointerType::getUnqual(InitValue->getType()) : VoidPtrType;
+ Type *ElementTypes[4] = {WordType, WordType, VoidPtrType, InitPtrType};
+ ArrayRef<Type*> ElementTypeArray(ElementTypes, 4);
+ StructType *EmuTlsVarType = StructType::create(ElementTypeArray);
+ EmuTlsVar = cast<GlobalVariable>(
+ M.getOrInsertGlobal(EmuTlsVarName, EmuTlsVarType));
+ copyLinkageVisibility(M, GV, EmuTlsVar);
+
+ // Define "__emutls_t.*" and "__emutls_v.*" only if GV is defined.
+ if (!GV->hasInitializer())
+ return true;
+
+ Type *GVType = GV->getValueType();
+ unsigned GVAlignment = GV->getAlignment();
+ if (!GVAlignment) {
+ // When LLVM IL declares a variable without alignment, use
+ // the ABI default alignment for the type.
+ GVAlignment = DL.getABITypeAlignment(GVType);
+ }
+
+ // Define "__emutls_t.*" if there is InitValue
+ GlobalVariable *EmuTlsTmplVar = nullptr;
+ if (InitValue) {
+ std::string EmuTlsTmplName = ("__emutls_t." + GV->getName()).str();
+ EmuTlsTmplVar = dyn_cast_or_null<GlobalVariable>(
+ M.getOrInsertGlobal(EmuTlsTmplName, GVType));
+ assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer");
+ EmuTlsTmplVar->setConstant(true);
+ EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue));
+ EmuTlsTmplVar->setAlignment(GVAlignment);
+ copyLinkageVisibility(M, GV, EmuTlsTmplVar);
+ }
+
+ // Define "__emutls_v.*" with initializer and alignment.
+ Constant *ElementValues[4] = {
+ ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)),
+ ConstantInt::get(WordType, GVAlignment),
+ NullPtr, EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr
+ };
+ ArrayRef<Constant*> ElementValueArray(ElementValues, 4);
+ EmuTlsVar->setInitializer(
+ ConstantStruct::get(EmuTlsVarType, ElementValueArray));
+ unsigned MaxAlignment = std::max(
+ DL.getABITypeAlignment(WordType),
+ DL.getABITypeAlignment(VoidPtrType));
+ EmuTlsVar->setAlignment(MaxAlignment);
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
new file mode 100644
index 000000000000..6e3de52f1a9c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -0,0 +1,612 @@
+//===- MILexer.cpp - Machine instructions lexer implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the lexing of machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MILexer.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+
+typedef function_ref<void(StringRef::iterator Loc, const Twine &)>
+ ErrorCallbackType;
+
+/// This class provides a way to iterate and get characters from the source
+/// string.
+class Cursor {
+ const char *Ptr;
+ const char *End;
+
+public:
+ Cursor(NoneType) : Ptr(nullptr), End(nullptr) {}
+
+ explicit Cursor(StringRef Str) {
+ Ptr = Str.data();
+ End = Ptr + Str.size();
+ }
+
+ bool isEOF() const { return Ptr == End; }
+
+ char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; }
+
+ void advance(unsigned I = 1) { Ptr += I; }
+
+ StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
+
+ StringRef upto(Cursor C) const {
+ assert(C.Ptr >= Ptr && C.Ptr <= End);
+ return StringRef(Ptr, C.Ptr - Ptr);
+ }
+
+ StringRef::iterator location() const { return Ptr; }
+
+ operator bool() const { return Ptr != nullptr; }
+};
+
+} // end anonymous namespace
+
+MIToken &MIToken::reset(TokenKind Kind, StringRef Range) {
+ this->Kind = Kind;
+ this->Range = Range;
+ return *this;
+}
+
+MIToken &MIToken::setStringValue(StringRef StrVal) {
+ StringValue = StrVal;
+ return *this;
+}
+
+MIToken &MIToken::setOwnedStringValue(std::string StrVal) {
+ StringValueStorage = std::move(StrVal);
+ StringValue = StringValueStorage;
+ return *this;
+}
+
+MIToken &MIToken::setIntegerValue(APSInt IntVal) {
+ this->IntVal = std::move(IntVal);
+ return *this;
+}
+
+/// Skip the leading whitespace characters and return the updated cursor.
+static Cursor skipWhitespace(Cursor C) {
+ while (isblank(C.peek()))
+ C.advance();
+ return C;
+}
+
+static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; }
+
+/// Skip a line comment and return the updated cursor.
+static Cursor skipComment(Cursor C) {
+ if (C.peek() != ';')
+ return C;
+ while (!isNewlineChar(C.peek()) && !C.isEOF())
+ C.advance();
+ return C;
+}
+
+/// Return true if the given character satisfies the following regular
+/// expression: [-a-zA-Z$._0-9]
+static bool isIdentifierChar(char C) {
+ return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' ||
+ C == '$';
+}
+
+/// Unescapes the given string value.
+///
+/// Expects the string value to be quoted.
+static std::string unescapeQuotedString(StringRef Value) {
+ assert(Value.front() == '"' && Value.back() == '"');
+ Cursor C = Cursor(Value.substr(1, Value.size() - 2));
+
+ std::string Str;
+ Str.reserve(C.remaining().size());
+ while (!C.isEOF()) {
+ char Char = C.peek();
+ if (Char == '\\') {
+ if (C.peek(1) == '\\') {
+ // Two '\' become one
+ Str += '\\';
+ C.advance(2);
+ continue;
+ }
+ if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
+ Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
+ C.advance(3);
+ continue;
+ }
+ }
+ Str += Char;
+ C.advance();
+ }
+ return Str;
+}
+
+/// Lex a string constant using the following regular expression: \"[^\"]*\"
+static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) {
+ assert(C.peek() == '"');
+ for (C.advance(); C.peek() != '"'; C.advance()) {
+ if (C.isEOF() || isNewlineChar(C.peek())) {
+ ErrorCallback(
+ C.location(),
+ "end of machine instruction reached before the closing '\"'");
+ return None;
+ }
+ }
+ C.advance();
+ return C;
+}
+
+static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
+ unsigned PrefixLength, ErrorCallbackType ErrorCallback) {
+ auto Range = C;
+ C.advance(PrefixLength);
+ if (C.peek() == '"') {
+ if (Cursor R = lexStringConstant(C, ErrorCallback)) {
+ StringRef String = Range.upto(R);
+ Token.reset(Type, String)
+ .setOwnedStringValue(
+ unescapeQuotedString(String.drop_front(PrefixLength)));
+ return R;
+ }
+ Token.reset(MIToken::Error, Range.remaining());
+ return Range;
+ }
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ Token.reset(Type, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(PrefixLength));
+ return C;
+}
+
+static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) {
+ if (C.peek() != 'i' || !isdigit(C.peek(1)))
+ return None;
+ auto Range = C;
+ C.advance(); // Skip 'i'
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::IntegerType, Range.upto(C));
+ return C;
+}
+
+static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
+ return StringSwitch<MIToken::TokenKind>(Identifier)
+ .Case("_", MIToken::underscore)
+ .Case("implicit", MIToken::kw_implicit)
+ .Case("implicit-def", MIToken::kw_implicit_define)
+ .Case("def", MIToken::kw_def)
+ .Case("dead", MIToken::kw_dead)
+ .Case("killed", MIToken::kw_killed)
+ .Case("undef", MIToken::kw_undef)
+ .Case("internal", MIToken::kw_internal)
+ .Case("early-clobber", MIToken::kw_early_clobber)
+ .Case("debug-use", MIToken::kw_debug_use)
+ .Case("tied-def", MIToken::kw_tied_def)
+ .Case("frame-setup", MIToken::kw_frame_setup)
+ .Case("debug-location", MIToken::kw_debug_location)
+ .Case(".cfi_same_value", MIToken::kw_cfi_same_value)
+ .Case(".cfi_offset", MIToken::kw_cfi_offset)
+ .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register)
+ .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+ .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa)
+ .Case("blockaddress", MIToken::kw_blockaddress)
+ .Case("target-index", MIToken::kw_target_index)
+ .Case("half", MIToken::kw_half)
+ .Case("float", MIToken::kw_float)
+ .Case("double", MIToken::kw_double)
+ .Case("x86_fp80", MIToken::kw_x86_fp80)
+ .Case("fp128", MIToken::kw_fp128)
+ .Case("ppc_fp128", MIToken::kw_ppc_fp128)
+ .Case("target-flags", MIToken::kw_target_flags)
+ .Case("volatile", MIToken::kw_volatile)
+ .Case("non-temporal", MIToken::kw_non_temporal)
+ .Case("invariant", MIToken::kw_invariant)
+ .Case("align", MIToken::kw_align)
+ .Case("stack", MIToken::kw_stack)
+ .Case("got", MIToken::kw_got)
+ .Case("jump-table", MIToken::kw_jump_table)
+ .Case("constant-pool", MIToken::kw_constant_pool)
+ .Case("call-entry", MIToken::kw_call_entry)
+ .Case("liveout", MIToken::kw_liveout)
+ .Case("address-taken", MIToken::kw_address_taken)
+ .Case("landing-pad", MIToken::kw_landing_pad)
+ .Case("liveins", MIToken::kw_liveins)
+ .Case("successors", MIToken::kw_successors)
+ .Default(MIToken::Identifier);
+}
+
+static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
+ if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.')
+ return None;
+ auto Range = C;
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ auto Identifier = Range.upto(C);
+ Token.reset(getIdentifierKind(Identifier), Identifier)
+ .setStringValue(Identifier);
+ return C;
+}
+
+static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ bool IsReference = C.remaining().startswith("%bb.");
+ if (!IsReference && !C.remaining().startswith("bb."))
+ return None;
+ auto Range = C;
+ unsigned PrefixLength = IsReference ? 4 : 3;
+ C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
+ if (!isdigit(C.peek())) {
+ Token.reset(MIToken::Error, C.remaining());
+ ErrorCallback(C.location(), "expected a number after '%bb.'");
+ return C;
+ }
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ StringRef Number = NumberRange.upto(C);
+ unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
+ if (C.peek() == '.') {
+ C.advance(); // Skip '.'
+ ++StringOffset;
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ }
+ Token.reset(IsReference ? MIToken::MachineBasicBlock
+ : MIToken::MachineBasicBlockLabel,
+ Range.upto(C))
+ .setIntegerValue(APSInt(Number))
+ .setStringValue(Range.upto(C).drop_front(StringOffset));
+ return C;
+}
+
+static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
+ MIToken::TokenKind Kind) {
+ if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ return None;
+ auto Range = C;
+ C.advance(Rule.size());
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
+ return C;
+}
+
+static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
+ MIToken::TokenKind Kind) {
+ if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+ return None;
+ auto Range = C;
+ C.advance(Rule.size());
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ StringRef Number = NumberRange.upto(C);
+ unsigned StringOffset = Rule.size() + Number.size();
+ if (C.peek() == '.') {
+ C.advance();
+ ++StringOffset;
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ }
+ Token.reset(Kind, Range.upto(C))
+ .setIntegerValue(APSInt(Number))
+ .setStringValue(Range.upto(C).drop_front(StringOffset));
+ return C;
+}
+
+static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
+}
+
+static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
+ return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
+}
+
+static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
+}
+
+static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
+ return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
+}
+
+static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "%subreg.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
+ ErrorCallback);
+}
+
+static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "%ir-block.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ if (isdigit(C.peek(Rule.size())))
+ return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
+ return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
+}
+
+static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ const StringRef Rule = "%ir.";
+ if (!C.remaining().startswith(Rule))
+ return None;
+ if (isdigit(C.peek(Rule.size())))
+ return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
+ return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
+}
+
+static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
+ auto Range = C;
+ C.advance(); // Skip '%'
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::VirtualRegister, Range.upto(C))
+ .setIntegerValue(APSInt(NumberRange.upto(C)));
+ return C;
+}
+
+static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
+ if (C.peek() != '%')
+ return None;
+ if (isdigit(C.peek(1)))
+ return lexVirtualRegister(C, Token);
+ auto Range = C;
+ C.advance(); // Skip '%'
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ Token.reset(MIToken::NamedRegister, Range.upto(C))
+ .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
+ return C;
+}
+
+static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '@')
+ return None;
+ if (!isdigit(C.peek(1)))
+ return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
+ ErrorCallback);
+ auto Range = C;
+ C.advance(1); // Skip the '@'
+ auto NumberRange = C;
+ while (isdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::GlobalValue, Range.upto(C))
+ .setIntegerValue(APSInt(NumberRange.upto(C)));
+ return C;
+}
+
+static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '$')
+ return None;
+ return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
+ ErrorCallback);
+}
+
+static bool isValidHexFloatingPointPrefix(char C) {
+ return C == 'H' || C == 'K' || C == 'L' || C == 'M';
+}
+
+static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) {
+ if (C.peek() != '0' || C.peek(1) != 'x')
+ return None;
+ Cursor Range = C;
+ C.advance(2); // Skip '0x'
+ if (isValidHexFloatingPointPrefix(C.peek()))
+ C.advance();
+ while (isxdigit(C.peek()))
+ C.advance();
+ Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
+ C.advance();
+ // Skip over [0-9]*([eE][-+]?[0-9]+)?
+ while (isdigit(C.peek()))
+ C.advance();
+ if ((C.peek() == 'e' || C.peek() == 'E') &&
+ (isdigit(C.peek(1)) ||
+ ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
+ C.advance(2);
+ while (isdigit(C.peek()))
+ C.advance();
+ }
+ Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
+ if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
+ return None;
+ auto Range = C;
+ C.advance();
+ while (isdigit(C.peek()))
+ C.advance();
+ if (C.peek() == '.')
+ return lexFloatingPointLiteral(Range, C, Token);
+ StringRef StrVal = Range.upto(C);
+ Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
+ return C;
+}
+
+static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
+ return StringSwitch<MIToken::TokenKind>(Identifier)
+ .Case("!tbaa", MIToken::md_tbaa)
+ .Case("!alias.scope", MIToken::md_alias_scope)
+ .Case("!noalias", MIToken::md_noalias)
+ .Case("!range", MIToken::md_range)
+ .Default(MIToken::Error);
+}
+
+static Cursor maybeLexExlaim(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '!')
+ return None;
+ auto Range = C;
+ C.advance(1);
+ if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
+ Token.reset(MIToken::exclaim, Range.upto(C));
+ return C;
+ }
+ while (isIdentifierChar(C.peek()))
+ C.advance();
+ StringRef StrVal = Range.upto(C);
+ Token.reset(getMetadataKeywordKind(StrVal), StrVal);
+ if (Token.isError())
+ ErrorCallback(Token.location(),
+ "use of unknown metadata keyword '" + StrVal + "'");
+ return C;
+}
+
+static MIToken::TokenKind symbolToken(char C) {
+ switch (C) {
+ case ',':
+ return MIToken::comma;
+ case '=':
+ return MIToken::equal;
+ case ':':
+ return MIToken::colon;
+ case '(':
+ return MIToken::lparen;
+ case ')':
+ return MIToken::rparen;
+ case '{':
+ return MIToken::lbrace;
+ case '}':
+ return MIToken::rbrace;
+ case '+':
+ return MIToken::plus;
+ case '-':
+ return MIToken::minus;
+ case '<':
+ return MIToken::less;
+ case '>':
+ return MIToken::greater;
+ default:
+ return MIToken::Error;
+ }
+}
+
+static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
+ MIToken::TokenKind Kind;
+ unsigned Length = 1;
+ if (C.peek() == ':' && C.peek(1) == ':') {
+ Kind = MIToken::coloncolon;
+ Length = 2;
+ } else
+ Kind = symbolToken(C.peek());
+ if (Kind == MIToken::Error)
+ return None;
+ auto Range = C;
+ C.advance(Length);
+ Token.reset(Kind, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
+ if (!isNewlineChar(C.peek()))
+ return None;
+ auto Range = C;
+ C.advance();
+ Token.reset(MIToken::Newline, Range.upto(C));
+ return C;
+}
+
+static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ if (C.peek() != '`')
+ return None;
+ auto Range = C;
+ C.advance();
+ auto StrRange = C;
+ while (C.peek() != '`') {
+ if (C.isEOF() || isNewlineChar(C.peek())) {
+ ErrorCallback(
+ C.location(),
+ "end of machine instruction reached before the closing '`'");
+ Token.reset(MIToken::Error, Range.remaining());
+ return C;
+ }
+ C.advance();
+ }
+ StringRef Value = StrRange.upto(C);
+ C.advance();
+ Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
+ return C;
+}
+
+StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
+ auto C = skipComment(skipWhitespace(Cursor(Source)));
+ if (C.isEOF()) {
+ Token.reset(MIToken::Eof, C.remaining());
+ return C.remaining();
+ }
+
+ if (Cursor R = maybeLexIntegerType(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexIdentifier(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexJumpTableIndex(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexStackObject(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexFixedStackObject(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexConstantPoolItem(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexRegister(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexNumericalLiteral(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
+ return R.remaining();
+ if (Cursor R = maybeLexSymbol(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexNewline(C, Token))
+ return R.remaining();
+ if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
+ return R.remaining();
+
+ Token.reset(MIToken::Error, C.remaining());
+ ErrorCallback(C.location(),
+ Twine("unexpected character '") + Twine(C.peek()) + "'");
+ return C.remaining();
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
new file mode 100644
index 000000000000..32fc8ab271e6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -0,0 +1,196 @@
+//===- MILexer.h - Lexer for machine instructions -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function that lexes the machine instruction source
+// string.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
+#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include <functional>
+
+namespace llvm {
+
+class Twine;
+
+/// A token produced by the machine instruction lexer.
+struct MIToken {
+ enum TokenKind {
+ // Markers
+ Eof,
+ Error,
+ Newline,
+
+ // Tokens with no info.
+ comma,
+ equal,
+ underscore,
+ colon,
+ coloncolon,
+ exclaim,
+ lparen,
+ rparen,
+ lbrace,
+ rbrace,
+ plus,
+ minus,
+ less,
+ greater,
+
+ // Keywords
+ kw_implicit,
+ kw_implicit_define,
+ kw_def,
+ kw_dead,
+ kw_killed,
+ kw_undef,
+ kw_internal,
+ kw_early_clobber,
+ kw_debug_use,
+ kw_tied_def,
+ kw_frame_setup,
+ kw_debug_location,
+ kw_cfi_same_value,
+ kw_cfi_offset,
+ kw_cfi_def_cfa_register,
+ kw_cfi_def_cfa_offset,
+ kw_cfi_def_cfa,
+ kw_blockaddress,
+ kw_target_index,
+ kw_half,
+ kw_float,
+ kw_double,
+ kw_x86_fp80,
+ kw_fp128,
+ kw_ppc_fp128,
+ kw_target_flags,
+ kw_volatile,
+ kw_non_temporal,
+ kw_invariant,
+ kw_align,
+ kw_stack,
+ kw_got,
+ kw_jump_table,
+ kw_constant_pool,
+ kw_call_entry,
+ kw_liveout,
+ kw_address_taken,
+ kw_landing_pad,
+ kw_liveins,
+ kw_successors,
+
+ // Named metadata keywords
+ md_tbaa,
+ md_alias_scope,
+ md_noalias,
+ md_range,
+
+ // Identifier tokens
+ Identifier,
+ IntegerType,
+ NamedRegister,
+ MachineBasicBlockLabel,
+ MachineBasicBlock,
+ StackObject,
+ FixedStackObject,
+ NamedGlobalValue,
+ GlobalValue,
+ ExternalSymbol,
+
+ // Other tokens
+ IntegerLiteral,
+ FloatingPointLiteral,
+ VirtualRegister,
+ ConstantPoolItem,
+ JumpTableIndex,
+ NamedIRBlock,
+ IRBlock,
+ NamedIRValue,
+ IRValue,
+ QuotedIRValue, // `<constant value>`
+ SubRegisterIndex
+ };
+
+private:
+ TokenKind Kind;
+ StringRef Range;
+ StringRef StringValue;
+ std::string StringValueStorage;
+ APSInt IntVal;
+
+public:
+ MIToken() : Kind(Error) {}
+
+ MIToken &reset(TokenKind Kind, StringRef Range);
+
+ MIToken &setStringValue(StringRef StrVal);
+ MIToken &setOwnedStringValue(std::string StrVal);
+ MIToken &setIntegerValue(APSInt IntVal);
+
+ TokenKind kind() const { return Kind; }
+
+ bool isError() const { return Kind == Error; }
+
+ bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
+
+ bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
+
+ bool isRegister() const {
+ return Kind == NamedRegister || Kind == underscore ||
+ Kind == VirtualRegister;
+ }
+
+ bool isRegisterFlag() const {
+ return Kind == kw_implicit || Kind == kw_implicit_define ||
+ Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
+ Kind == kw_undef || Kind == kw_internal ||
+ Kind == kw_early_clobber || Kind == kw_debug_use;
+ }
+
+ bool isMemoryOperandFlag() const {
+ return Kind == kw_volatile || Kind == kw_non_temporal ||
+ Kind == kw_invariant;
+ }
+
+ bool is(TokenKind K) const { return Kind == K; }
+
+ bool isNot(TokenKind K) const { return Kind != K; }
+
+ StringRef::iterator location() const { return Range.begin(); }
+
+ StringRef range() const { return Range; }
+
+ /// Return the token's string value.
+ StringRef stringValue() const { return StringValue; }
+
+ const APSInt &integerValue() const { return IntVal; }
+
+ bool hasIntegerValue() const {
+ return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
+ Kind == MachineBasicBlockLabel || Kind == StackObject ||
+ Kind == FixedStackObject || Kind == GlobalValue ||
+ Kind == VirtualRegister || Kind == ConstantPoolItem ||
+ Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
+ }
+};
+
+/// Consume a single machine instruction token in the given source and return
+/// the remaining source string.
+StringRef lexMIToken(
+ StringRef Source, MIToken &Token,
+ function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
new file mode 100644
index 000000000000..b3fd16f15889
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -0,0 +1,2094 @@
+//===- MIParser.cpp - Machine instructions parser implementation ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the parsing of machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIParser.h"
+#include "MILexer.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
+ SourceMgr &SM, const SlotMapping &IRSlots)
+ : MF(MF), SM(&SM), IRSlots(IRSlots) {
+}
+
+namespace {
+
+/// A wrapper struct around the 'MachineOperand' struct that includes a source
+/// range and other attributes.
+struct ParsedMachineOperand {
+ MachineOperand Operand;
+ StringRef::iterator Begin;
+ StringRef::iterator End;
+ Optional<unsigned> TiedDefIdx;
+
+ ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin,
+ StringRef::iterator End, Optional<unsigned> &TiedDefIdx)
+ : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) {
+ if (TiedDefIdx)
+ assert(Operand.isReg() && Operand.isUse() &&
+ "Only used register operands can be tied");
+ }
+};
+
+class MIParser {
+ MachineFunction &MF;
+ SMDiagnostic &Error;
+ StringRef Source, CurrentSource;
+ MIToken Token;
+ const PerFunctionMIParsingState &PFS;
+ /// Maps from instruction names to op codes.
+ StringMap<unsigned> Names2InstrOpCodes;
+ /// Maps from register names to registers.
+ StringMap<unsigned> Names2Regs;
+ /// Maps from register mask names to register masks.
+ StringMap<const uint32_t *> Names2RegMasks;
+ /// Maps from subregister names to subregister indices.
+ StringMap<unsigned> Names2SubRegIndices;
+ /// Maps from slot numbers to function's unnamed basic blocks.
+ DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
+ /// Maps from slot numbers to function's unnamed values.
+ DenseMap<unsigned, const Value *> Slots2Values;
+ /// Maps from target index names to target indices.
+ StringMap<int> Names2TargetIndices;
+ /// Maps from direct target flag names to the direct target flag values.
+ StringMap<unsigned> Names2DirectTargetFlags;
+ /// Maps from direct target flag names to the bitmask target flag values.
+ StringMap<unsigned> Names2BitmaskTargetFlags;
+
+public:
+ MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source);
+
+ /// \p SkipChar gives the number of characters to skip before looking
+ /// for the next token.
+ void lex(unsigned SkipChar = 0);
+
+ /// Report an error at the current location with the given message.
+ ///
+ /// This function always return true.
+ bool error(const Twine &Msg);
+
+ /// Report an error at the given location with the given message.
+ ///
+ /// This function always return true.
+ bool error(StringRef::iterator Loc, const Twine &Msg);
+
+ bool
+ parseBasicBlockDefinitions(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+ bool parseBasicBlocks();
+ bool parse(MachineInstr *&MI);
+ bool parseStandaloneMBB(MachineBasicBlock *&MBB);
+ bool parseStandaloneNamedRegister(unsigned &Reg);
+ bool parseStandaloneVirtualRegister(unsigned &Reg);
+ bool parseStandaloneStackObject(int &FI);
+ bool parseStandaloneMDNode(MDNode *&Node);
+
+ bool
+ parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+ bool parseBasicBlock(MachineBasicBlock &MBB);
+ bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
+ bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
+
+ bool parseRegister(unsigned &Reg);
+ bool parseRegisterFlag(unsigned &Flags);
+ bool parseSubRegisterIndex(unsigned &SubReg);
+ bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
+ bool parseSize(unsigned &Size);
+ bool parseRegisterOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx, bool IsDef = false);
+ bool parseImmediateOperand(MachineOperand &Dest);
+ bool parseIRConstant(StringRef::iterator Loc, StringRef Source,
+ const Constant *&C);
+ bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
+ bool parseIRType(StringRef::iterator Loc, StringRef Source, unsigned &Read,
+ Type *&Ty);
+ // \p MustBeSized defines whether or not \p Ty must be sized.
+ bool parseIRType(StringRef::iterator Loc, Type *&Ty, bool MustBeSized = true);
+ bool parseTypedImmediateOperand(MachineOperand &Dest);
+ bool parseFPImmediateOperand(MachineOperand &Dest);
+ bool parseMBBReference(MachineBasicBlock *&MBB);
+ bool parseMBBOperand(MachineOperand &Dest);
+ bool parseStackFrameIndex(int &FI);
+ bool parseStackObjectOperand(MachineOperand &Dest);
+ bool parseFixedStackFrameIndex(int &FI);
+ bool parseFixedStackObjectOperand(MachineOperand &Dest);
+ bool parseGlobalValue(GlobalValue *&GV);
+ bool parseGlobalAddressOperand(MachineOperand &Dest);
+ bool parseConstantPoolIndexOperand(MachineOperand &Dest);
+ bool parseSubRegisterIndexOperand(MachineOperand &Dest);
+ bool parseJumpTableIndexOperand(MachineOperand &Dest);
+ bool parseExternalSymbolOperand(MachineOperand &Dest);
+ bool parseMDNode(MDNode *&Node);
+ bool parseMetadataOperand(MachineOperand &Dest);
+ bool parseCFIOffset(int &Offset);
+ bool parseCFIRegister(unsigned &Reg);
+ bool parseCFIOperand(MachineOperand &Dest);
+ bool parseIRBlock(BasicBlock *&BB, const Function &F);
+ bool parseBlockAddressOperand(MachineOperand &Dest);
+ bool parseTargetIndexOperand(MachineOperand &Dest);
+ bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
+ bool parseMachineOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx);
+ bool parseMachineOperandAndTargetFlags(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx);
+ bool parseOffset(int64_t &Offset);
+ bool parseAlignment(unsigned &Alignment);
+ bool parseOperandsOffset(MachineOperand &Op);
+ bool parseIRValue(const Value *&V);
+ bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
+ bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
+ bool parseMachinePointerInfo(MachinePointerInfo &Dest);
+ bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
+
+private:
+ /// Convert the integer literal in the current token into an unsigned integer.
+ ///
+ /// Return true if an error occurred.
+ bool getUnsigned(unsigned &Result);
+
+ /// Convert the integer literal in the current token into an uint64.
+ ///
+ /// Return true if an error occurred.
+ bool getUint64(uint64_t &Result);
+
+ /// If the current token is of the given kind, consume it and return false.
+ /// Otherwise report an error and return true.
+ bool expectAndConsume(MIToken::TokenKind TokenKind);
+
+ /// If the current token is of the given kind, consume it and return true.
+ /// Otherwise return false.
+ bool consumeIfPresent(MIToken::TokenKind TokenKind);
+
+ void initNames2InstrOpCodes();
+
+ /// Try to convert an instruction name to an opcode. Return true if the
+ /// instruction name is invalid.
+ bool parseInstrName(StringRef InstrName, unsigned &OpCode);
+
+ bool parseInstruction(unsigned &OpCode, unsigned &Flags);
+
+ bool assignRegisterTies(MachineInstr &MI,
+ ArrayRef<ParsedMachineOperand> Operands);
+
+ bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
+ const MCInstrDesc &MCID);
+
+ void initNames2Regs();
+
+ /// Try to convert a register name to a register number. Return true if the
+ /// register name is invalid.
+ bool getRegisterByName(StringRef RegName, unsigned &Reg);
+
+ void initNames2RegMasks();
+
+ /// Check if the given identifier is a name of a register mask.
+ ///
+ /// Return null if the identifier isn't a register mask.
+ const uint32_t *getRegMask(StringRef Identifier);
+
+ void initNames2SubRegIndices();
+
+ /// Check if the given identifier is a name of a subregister index.
+ ///
+ /// Return 0 if the name isn't a subregister index class.
+ unsigned getSubRegIndex(StringRef Name);
+
+ const BasicBlock *getIRBlock(unsigned Slot);
+ const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
+
+ const Value *getIRValue(unsigned Slot);
+
+ void initNames2TargetIndices();
+
+ /// Try to convert a name of target index to the corresponding target index.
+ ///
+ /// Return true if the name isn't a name of a target index.
+ bool getTargetIndex(StringRef Name, int &Index);
+
+ void initNames2DirectTargetFlags();
+
+ /// Try to convert a name of a direct target flag to the corresponding
+ /// target flag.
+ ///
+ /// Return true if the name isn't a name of a direct flag.
+ bool getDirectTargetFlag(StringRef Name, unsigned &Flag);
+
+ void initNames2BitmaskTargetFlags();
+
+ /// Try to convert a name of a bitmask target flag to the corresponding
+ /// target flag.
+ ///
+ /// Return true if the name isn't a name of a bitmask target flag.
+ bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
+};
+
+} // end anonymous namespace
+
+MIParser::MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+ StringRef Source)
+ : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS)
+{}
+
+void MIParser::lex(unsigned SkipChar) {
+ CurrentSource = lexMIToken(
+ CurrentSource.data() + SkipChar, Token,
+ [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); });
+}
+
+bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
+
+bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
+ const SourceMgr &SM = *PFS.SM;
+ assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
+ const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID());
+ if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) {
+ // Create an ordinary diagnostic when the source manager's buffer is the
+ // source string.
+ Error = SM.GetMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
+ return true;
+ }
+ // Create a diagnostic for a YAML string literal.
+ Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
+ Loc - Source.data(), SourceMgr::DK_Error, Msg.str(),
+ Source, None, None);
+ return true;
+}
+
+static const char *toString(MIToken::TokenKind TokenKind) {
+ switch (TokenKind) {
+ case MIToken::comma:
+ return "','";
+ case MIToken::equal:
+ return "'='";
+ case MIToken::colon:
+ return "':'";
+ case MIToken::lparen:
+ return "'('";
+ case MIToken::rparen:
+ return "')'";
+ default:
+ return "<unknown token>";
+ }
+}
+
+bool MIParser::expectAndConsume(MIToken::TokenKind TokenKind) {
+ if (Token.isNot(TokenKind))
+ return error(Twine("expected ") + toString(TokenKind));
+ lex();
+ return false;
+}
+
+bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) {
+ if (Token.isNot(TokenKind))
+ return false;
+ lex();
+ return true;
+}
+
+bool MIParser::parseBasicBlockDefinition(
+ DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ unsigned ID = 0;
+ if (getUnsigned(ID))
+ return true;
+ auto Loc = Token.location();
+ auto Name = Token.stringValue();
+ lex();
+ bool HasAddressTaken = false;
+ bool IsLandingPad = false;
+ unsigned Alignment = 0;
+ BasicBlock *BB = nullptr;
+ if (consumeIfPresent(MIToken::lparen)) {
+ do {
+ // TODO: Report an error when multiple same attributes are specified.
+ switch (Token.kind()) {
+ case MIToken::kw_address_taken:
+ HasAddressTaken = true;
+ lex();
+ break;
+ case MIToken::kw_landing_pad:
+ IsLandingPad = true;
+ lex();
+ break;
+ case MIToken::kw_align:
+ if (parseAlignment(Alignment))
+ return true;
+ break;
+ case MIToken::IRBlock:
+ // TODO: Report an error when both name and ir block are specified.
+ if (parseIRBlock(BB, *MF.getFunction()))
+ return true;
+ lex();
+ break;
+ default:
+ break;
+ }
+ } while (consumeIfPresent(MIToken::comma));
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ if (expectAndConsume(MIToken::colon))
+ return true;
+
+ if (!Name.empty()) {
+ BB = dyn_cast_or_null<BasicBlock>(
+ MF.getFunction()->getValueSymbolTable().lookup(Name));
+ if (!BB)
+ return error(Loc, Twine("basic block '") + Name +
+ "' is not defined in the function '" +
+ MF.getName() + "'");
+ }
+ auto *MBB = MF.CreateMachineBasicBlock(BB);
+ MF.insert(MF.end(), MBB);
+ bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second;
+ if (!WasInserted)
+ return error(Loc, Twine("redefinition of machine basic block with id #") +
+ Twine(ID));
+ if (Alignment)
+ MBB->setAlignment(Alignment);
+ if (HasAddressTaken)
+ MBB->setHasAddressTaken();
+ MBB->setIsEHPad(IsLandingPad);
+ return false;
+}
+
+bool MIParser::parseBasicBlockDefinitions(
+ DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+ lex();
+ // Skip until the first machine basic block.
+ while (Token.is(MIToken::Newline))
+ lex();
+ if (Token.isErrorOrEOF())
+ return Token.isError();
+ if (Token.isNot(MIToken::MachineBasicBlockLabel))
+ return error("expected a basic block definition before instructions");
+ unsigned BraceDepth = 0;
+ do {
+ if (parseBasicBlockDefinition(MBBSlots))
+ return true;
+ bool IsAfterNewline = false;
+ // Skip until the next machine basic block.
+ while (true) {
+ if ((Token.is(MIToken::MachineBasicBlockLabel) && IsAfterNewline) ||
+ Token.isErrorOrEOF())
+ break;
+ else if (Token.is(MIToken::MachineBasicBlockLabel))
+ return error("basic block definition should be located at the start of "
+ "the line");
+ else if (consumeIfPresent(MIToken::Newline)) {
+ IsAfterNewline = true;
+ continue;
+ }
+ IsAfterNewline = false;
+ if (Token.is(MIToken::lbrace))
+ ++BraceDepth;
+ if (Token.is(MIToken::rbrace)) {
+ if (!BraceDepth)
+ return error("extraneous closing brace ('}')");
+ --BraceDepth;
+ }
+ lex();
+ }
+ // Verify that we closed all of the '{' at the end of a file or a block.
+ if (!Token.isError() && BraceDepth)
+ return error("expected '}'"); // FIXME: Report a note that shows '{'.
+ } while (!Token.isErrorOrEOF());
+ return Token.isError();
+}
+
+bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
+ assert(Token.is(MIToken::kw_liveins));
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNewlineOrEOF()) // Allow an empty list of liveins.
+ return false;
+ do {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ unsigned Reg = 0;
+ if (parseRegister(Reg))
+ return true;
+ MBB.addLiveIn(Reg);
+ lex();
+ } while (consumeIfPresent(MIToken::comma));
+ return false;
+}
+
+bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
+ assert(Token.is(MIToken::kw_successors));
+ lex();
+ if (expectAndConsume(MIToken::colon))
+ return true;
+ if (Token.isNewlineOrEOF()) // Allow an empty list of successors.
+ return false;
+ do {
+ if (Token.isNot(MIToken::MachineBasicBlock))
+ return error("expected a machine basic block reference");
+ MachineBasicBlock *SuccMBB = nullptr;
+ if (parseMBBReference(SuccMBB))
+ return true;
+ lex();
+ unsigned Weight = 0;
+ if (consumeIfPresent(MIToken::lparen)) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after '('");
+ if (getUnsigned(Weight))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ MBB.addSuccessor(SuccMBB, BranchProbability::getRaw(Weight));
+ } while (consumeIfPresent(MIToken::comma));
+ MBB.normalizeSuccProbs();
+ return false;
+}
+
+bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) {
+ // Skip the definition.
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ lex();
+ if (consumeIfPresent(MIToken::lparen)) {
+ while (Token.isNot(MIToken::rparen) && !Token.isErrorOrEOF())
+ lex();
+ consumeIfPresent(MIToken::rparen);
+ }
+ consumeIfPresent(MIToken::colon);
+
+ // Parse the liveins and successors.
+ // N.B: Multiple lists of successors and liveins are allowed and they're
+ // merged into one.
+ // Example:
+ // liveins: %edi
+ // liveins: %esi
+ //
+ // is equivalent to
+ // liveins: %edi, %esi
+ while (true) {
+ if (Token.is(MIToken::kw_successors)) {
+ if (parseBasicBlockSuccessors(MBB))
+ return true;
+ } else if (Token.is(MIToken::kw_liveins)) {
+ if (parseBasicBlockLiveins(MBB))
+ return true;
+ } else if (consumeIfPresent(MIToken::Newline)) {
+ continue;
+ } else
+ break;
+ if (!Token.isNewlineOrEOF())
+ return error("expected line break at the end of a list");
+ lex();
+ }
+
+ // Parse the instructions.
+ bool IsInBundle = false;
+ MachineInstr *PrevMI = nullptr;
+ while (true) {
+ if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof))
+ return false;
+ else if (consumeIfPresent(MIToken::Newline))
+ continue;
+ if (consumeIfPresent(MIToken::rbrace)) {
+ // The first parsing pass should verify that all closing '}' have an
+ // opening '{'.
+ assert(IsInBundle);
+ IsInBundle = false;
+ continue;
+ }
+ MachineInstr *MI = nullptr;
+ if (parse(MI))
+ return true;
+ MBB.insert(MBB.end(), MI);
+ if (IsInBundle) {
+ PrevMI->setFlag(MachineInstr::BundledSucc);
+ MI->setFlag(MachineInstr::BundledPred);
+ }
+ PrevMI = MI;
+ if (Token.is(MIToken::lbrace)) {
+ if (IsInBundle)
+ return error("nested instruction bundles are not allowed");
+ lex();
+ // This instruction is the start of the bundle.
+ MI->setFlag(MachineInstr::BundledSucc);
+ IsInBundle = true;
+ if (!Token.is(MIToken::Newline))
+ // The next instruction can be on the same line.
+ continue;
+ }
+ assert(Token.isNewlineOrEOF() && "MI is not fully parsed");
+ lex();
+ }
+ return false;
+}
+
+bool MIParser::parseBasicBlocks() {
+ lex();
+ // Skip until the first machine basic block.
+ while (Token.is(MIToken::Newline))
+ lex();
+ if (Token.isErrorOrEOF())
+ return Token.isError();
+ // The first parsing pass should have verified that this token is a MBB label
+ // in the 'parseBasicBlockDefinitions' method.
+ assert(Token.is(MIToken::MachineBasicBlockLabel));
+ do {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(MBB))
+ return true;
+ if (parseBasicBlock(*MBB))
+ return true;
+ // The method 'parseBasicBlock' should parse the whole block until the next
+ // block or the end of file.
+ assert(Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof));
+ } while (Token.isNot(MIToken::Eof));
+ return false;
+}
+
+bool MIParser::parse(MachineInstr *&MI) {
+ // Parse any register operands before '='
+ MachineOperand MO = MachineOperand::CreateImm(0);
+ SmallVector<ParsedMachineOperand, 8> Operands;
+ while (Token.isRegister() || Token.isRegisterFlag()) {
+ auto Loc = Token.location();
+ Optional<unsigned> TiedDefIdx;
+ if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true))
+ return true;
+ Operands.push_back(
+ ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ }
+ if (!Operands.empty() && expectAndConsume(MIToken::equal))
+ return true;
+
+ unsigned OpCode, Flags = 0;
+ if (Token.isError() || parseInstruction(OpCode, Flags))
+ return true;
+
+ Type *Ty = nullptr;
+ if (isPreISelGenericOpcode(OpCode)) {
+ // For generic opcode, a type is mandatory.
+ auto Loc = Token.location();
+ if (parseIRType(Loc, Ty))
+ return true;
+ }
+
+ // Parse the remaining machine operands.
+ while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
+ Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
+ auto Loc = Token.location();
+ Optional<unsigned> TiedDefIdx;
+ if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
+ return true;
+ Operands.push_back(
+ ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ break;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ }
+
+ DebugLoc DebugLocation;
+ if (Token.is(MIToken::kw_debug_location)) {
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node after 'debug-location'");
+ MDNode *Node = nullptr;
+ if (parseMDNode(Node))
+ return true;
+ DebugLocation = DebugLoc(Node);
+ }
+
+ // Parse the machine memory operands.
+ SmallVector<MachineMemOperand *, 2> MemOperands;
+ if (Token.is(MIToken::coloncolon)) {
+ lex();
+ while (!Token.isNewlineOrEOF()) {
+ MachineMemOperand *MemOp = nullptr;
+ if (parseMachineMemoryOperand(MemOp))
+ return true;
+ MemOperands.push_back(MemOp);
+ if (Token.isNewlineOrEOF())
+ break;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine memory operand");
+ lex();
+ }
+ }
+
+ const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
+ if (!MCID.isVariadic()) {
+ // FIXME: Move the implicit operand verification to the machine verifier.
+ if (verifyImplicitOperands(Operands, MCID))
+ return true;
+ }
+
+ // TODO: Check for extraneous machine operands.
+ MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
+ MI->setFlags(Flags);
+ if (Ty)
+ MI->setType(Ty);
+ for (const auto &Operand : Operands)
+ MI->addOperand(MF, Operand.Operand);
+ if (assignRegisterTies(*MI, Operands))
+ return true;
+ if (MemOperands.empty())
+ return false;
+ MachineInstr::mmo_iterator MemRefs =
+ MF.allocateMemRefsArray(MemOperands.size());
+ std::copy(MemOperands.begin(), MemOperands.end(), MemRefs);
+ MI->setMemRefs(MemRefs, MemRefs + MemOperands.size());
+ return false;
+}
+
+bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) {
+ lex();
+ if (Token.isNot(MIToken::MachineBasicBlock))
+ return error("expected a machine basic block reference");
+ if (parseMBBReference(MBB))
+ return true;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error(
+ "expected end of string after the machine basic block reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
+ lex();
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ if (parseRegister(Reg))
+ return true;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the register reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) {
+ lex();
+ if (Token.isNot(MIToken::VirtualRegister))
+ return error("expected a virtual register");
+ if (parseRegister(Reg))
+ return true;
+ lex();
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the register reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneStackObject(int &FI) {
+ lex();
+ if (Token.isNot(MIToken::StackObject))
+ return error("expected a stack object");
+ if (parseStackFrameIndex(FI))
+ return true;
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the stack object reference");
+ return false;
+}
+
+bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
+ lex();
+ if (Token.isNot(MIToken::exclaim))
+ return error("expected a metadata node");
+ if (parseMDNode(Node))
+ return true;
+ if (Token.isNot(MIToken::Eof))
+ return error("expected end of string after the metadata node");
+ return false;
+}
+
+static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
+ assert(MO.isImplicit());
+ return MO.isDef() ? "implicit-def" : "implicit";
+}
+
+static std::string getRegisterName(const TargetRegisterInfo *TRI,
+ unsigned Reg) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg");
+ return StringRef(TRI->getName(Reg)).lower();
+}
+
+/// Return true if the parsed machine operands contain a given machine operand.
+static bool isImplicitOperandIn(const MachineOperand &ImplicitOperand,
+ ArrayRef<ParsedMachineOperand> Operands) {
+ for (const auto &I : Operands) {
+ if (ImplicitOperand.isIdenticalTo(I.Operand))
+ return true;
+ }
+ return false;
+}
+
+bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
+ const MCInstrDesc &MCID) {
+ if (MCID.isCall())
+ // We can't verify call instructions as they can contain arbitrary implicit
+ // register and register mask operands.
+ return false;
+
+ // Gather all the expected implicit operands.
+ SmallVector<MachineOperand, 4> ImplicitOperands;
+ if (MCID.ImplicitDefs)
+ for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
+ ImplicitOperands.push_back(
+ MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (MCID.ImplicitUses)
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
+ ImplicitOperands.push_back(
+ MachineOperand::CreateReg(*ImpUses, false, true));
+
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ for (const auto &I : ImplicitOperands) {
+ if (isImplicitOperandIn(I, Operands))
+ continue;
+ return error(Operands.empty() ? Token.location() : Operands.back().End,
+ Twine("missing implicit register operand '") +
+ printImplicitRegisterFlag(I) + " %" +
+ getRegisterName(TRI, I.getReg()) + "'");
+ }
+ return false;
+}
+
+bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
+ if (Token.is(MIToken::kw_frame_setup)) {
+ Flags |= MachineInstr::FrameSetup;
+ lex();
+ }
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected a machine instruction");
+ StringRef InstrName = Token.stringValue();
+ if (parseInstrName(InstrName, OpCode))
+ return error(Twine("unknown machine instruction name '") + InstrName + "'");
+ lex();
+ return false;
+}
+
+bool MIParser::parseRegister(unsigned &Reg) {
+ switch (Token.kind()) {
+ case MIToken::underscore:
+ Reg = 0;
+ break;
+ case MIToken::NamedRegister: {
+ StringRef Name = Token.stringValue();
+ if (getRegisterByName(Name, Reg))
+ return error(Twine("unknown register name '") + Name + "'");
+ break;
+ }
+ case MIToken::VirtualRegister: {
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ const auto RegInfo = PFS.VirtualRegisterSlots.find(ID);
+ if (RegInfo == PFS.VirtualRegisterSlots.end())
+ return error(Twine("use of undefined virtual register '%") + Twine(ID) +
+ "'");
+ Reg = RegInfo->second;
+ break;
+ }
+ // TODO: Parse other register kinds.
+ default:
+ llvm_unreachable("The current token should be a register");
+ }
+ return false;
+}
+
+bool MIParser::parseRegisterFlag(unsigned &Flags) {
+ const unsigned OldFlags = Flags;
+ switch (Token.kind()) {
+ case MIToken::kw_implicit:
+ Flags |= RegState::Implicit;
+ break;
+ case MIToken::kw_implicit_define:
+ Flags |= RegState::ImplicitDefine;
+ break;
+ case MIToken::kw_def:
+ Flags |= RegState::Define;
+ break;
+ case MIToken::kw_dead:
+ Flags |= RegState::Dead;
+ break;
+ case MIToken::kw_killed:
+ Flags |= RegState::Kill;
+ break;
+ case MIToken::kw_undef:
+ Flags |= RegState::Undef;
+ break;
+ case MIToken::kw_internal:
+ Flags |= RegState::InternalRead;
+ break;
+ case MIToken::kw_early_clobber:
+ Flags |= RegState::EarlyClobber;
+ break;
+ case MIToken::kw_debug_use:
+ Flags |= RegState::Debug;
+ break;
+ default:
+ llvm_unreachable("The current token should be a register flag");
+ }
+ if (OldFlags == Flags)
+ // We know that the same flag is specified more than once when the flags
+ // weren't modified.
+ return error("duplicate '" + Token.stringValue() + "' register flag");
+ lex();
+ return false;
+}
+
+bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
+ assert(Token.is(MIToken::colon));
+ lex();
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected a subregister index after ':'");
+ auto Name = Token.stringValue();
+ SubReg = getSubRegIndex(Name);
+ if (!SubReg)
+ return error(Twine("use of unknown subregister index '") + Name + "'");
+ lex();
+ return false;
+}
+
+bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
+ if (!consumeIfPresent(MIToken::kw_tied_def))
+ return error("expected 'tied-def' after '('");
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after 'tied-def'");
+ if (getUnsigned(TiedDefIdx))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ return false;
+}
+
+bool MIParser::parseSize(unsigned &Size) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal for the size");
+ if (getUnsigned(Size))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ return false;
+}
+
+bool MIParser::assignRegisterTies(MachineInstr &MI,
+ ArrayRef<ParsedMachineOperand> Operands) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs;
+ for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
+ if (!Operands[I].TiedDefIdx)
+ continue;
+ // The parser ensures that this operand is a register use, so we just have
+ // to check the tied-def operand.
+ unsigned DefIdx = Operands[I].TiedDefIdx.getValue();
+ if (DefIdx >= E)
+ return error(Operands[I].Begin,
+ Twine("use of invalid tied-def operand index '" +
+ Twine(DefIdx) + "'; instruction has only ") +
+ Twine(E) + " operands");
+ const auto &DefOperand = Operands[DefIdx].Operand;
+ if (!DefOperand.isReg() || !DefOperand.isDef())
+ // FIXME: add note with the def operand.
+ return error(Operands[I].Begin,
+ Twine("use of invalid tied-def operand index '") +
+ Twine(DefIdx) + "'; the operand #" + Twine(DefIdx) +
+ " isn't a defined register");
+ // Check that the tied-def operand wasn't tied elsewhere.
+ for (const auto &TiedPair : TiedRegisterPairs) {
+ if (TiedPair.first == DefIdx)
+ return error(Operands[I].Begin,
+ Twine("the tied-def operand #") + Twine(DefIdx) +
+ " is already tied with another register operand");
+ }
+ TiedRegisterPairs.push_back(std::make_pair(DefIdx, I));
+ }
+ // FIXME: Verify that for non INLINEASM instructions, the def and use tied
+ // indices must be less than tied max.
+ for (const auto &TiedPair : TiedRegisterPairs)
+ MI.tieOperands(TiedPair.first, TiedPair.second);
+ return false;
+}
+
+bool MIParser::parseRegisterOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx,
+ bool IsDef) {
+ unsigned Reg;
+ unsigned Flags = IsDef ? RegState::Define : 0;
+ while (Token.isRegisterFlag()) {
+ if (parseRegisterFlag(Flags))
+ return true;
+ }
+ if (!Token.isRegister())
+ return error("expected a register after register flags");
+ if (parseRegister(Reg))
+ return true;
+ lex();
+ unsigned SubReg = 0;
+ if (Token.is(MIToken::colon)) {
+ if (parseSubRegisterIndex(SubReg))
+ return true;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return error("subregister index expects a virtual register");
+ }
+ if ((Flags & RegState::Define) == 0) {
+ if (consumeIfPresent(MIToken::lparen)) {
+ unsigned Idx;
+ if (parseRegisterTiedDefIndex(Idx))
+ return true;
+ TiedDefIdx = Idx;
+ }
+ } else if (consumeIfPresent(MIToken::lparen)) {
+ // Virtual registers may have a size with GlobalISel.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return error("unexpected size on physical register");
+ unsigned Size;
+ if (parseSize(Size))
+ return true;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.setSize(Reg, Size);
+ } else if (PFS.GenericVRegs.count(Reg)) {
+ // Generic virtual registers must have a size.
+ // If we end up here this means the size hasn't been specified and
+ // this is bad!
+ return error("generic virtual registers must have a size");
+ }
+ Dest = MachineOperand::CreateReg(
+ Reg, Flags & RegState::Define, Flags & RegState::Implicit,
+ Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
+ Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug,
+ Flags & RegState::InternalRead);
+ return false;
+}
+
+bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::IntegerLiteral));
+ const APSInt &Int = Token.integerValue();
+ if (Int.getMinSignedBits() > 64)
+ return error("integer literal is too large to be an immediate operand");
+ Dest = MachineOperand::CreateImm(Int.getExtValue());
+ lex();
+ return false;
+}
+
+bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ const Constant *&C) {
+ auto Source = StringValue.str(); // The source has to be null terminated.
+ SMDiagnostic Err;
+ C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(),
+ &PFS.IRSlots);
+ if (!C)
+ return error(Loc + Err.getColumnNo(), Err.getMessage());
+ return false;
+}
+
+bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
+ if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C))
+ return true;
+ lex();
+ return false;
+}
+
+bool MIParser::parseIRType(StringRef::iterator Loc, StringRef StringValue,
+ unsigned &Read, Type *&Ty) {
+ auto Source = StringValue.str(); // The source has to be null terminated.
+ SMDiagnostic Err;
+ Ty = parseTypeAtBeginning(Source.c_str(), Read, Err,
+ *MF.getFunction()->getParent(), &PFS.IRSlots);
+ if (!Ty)
+ return error(Loc + Err.getColumnNo(), Err.getMessage());
+ return false;
+}
+
+bool MIParser::parseIRType(StringRef::iterator Loc, Type *&Ty,
+ bool MustBeSized) {
+ // At this point we enter in the IR world, i.e., to get the correct type,
+ // we need to hand off the whole string, not just the current token.
+ // E.g., <4 x i64> would give '<' as a token and there is not much
+ // the IR parser can do with that.
+ unsigned Read = 0;
+ if (parseIRType(Loc, StringRef(Loc), Read, Ty))
+ return true;
+ // The type must be sized, otherwise there is not much the backend
+ // can do with it.
+ if (MustBeSized && !Ty->isSized())
+ return error("expected a sized type");
+ // The next token is Read characters from the Loc.
+ // However, the current location is not Loc, but Loc + the length of Token.
+ // Therefore, subtract the length of Token (range().end() - Loc) to the
+ // number of characters to skip before the next token.
+ lex(Read - (Token.range().end() - Loc));
+ return false;
+}
+
+bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::IntegerType));
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal");
+ const Constant *C = nullptr;
+ if (parseIRConstant(Loc, C))
+ return true;
+ Dest = MachineOperand::CreateCImm(cast<ConstantInt>(C));
+ return false;
+}
+
+bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::FloatingPointLiteral))
+ return error("expected a floating point literal");
+ const Constant *C = nullptr;
+ if (parseIRConstant(Loc, C))
+ return true;
+ Dest = MachineOperand::CreateFPImm(cast<ConstantFP>(C));
+ return false;
+}
+
+bool MIParser::getUnsigned(unsigned &Result) {
+ assert(Token.hasIntegerValue() && "Expected a token with an integer value");
+ const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
+ uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
+ if (Val64 == Limit)
+ return error("expected 32-bit integer (too large)");
+ Result = Val64;
+ return false;
+}
+
+bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
+ assert(Token.is(MIToken::MachineBasicBlock) ||
+ Token.is(MIToken::MachineBasicBlockLabel));
+ unsigned Number;
+ if (getUnsigned(Number))
+ return true;
+ auto MBBInfo = PFS.MBBSlots.find(Number);
+ if (MBBInfo == PFS.MBBSlots.end())
+ return error(Twine("use of undefined machine basic block #") +
+ Twine(Number));
+ MBB = MBBInfo->second;
+ if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName())
+ return error(Twine("the name of machine basic block #") + Twine(Number) +
+ " isn't '" + Token.stringValue() + "'");
+ return false;
+}
+
+bool MIParser::parseMBBOperand(MachineOperand &Dest) {
+ MachineBasicBlock *MBB;
+ if (parseMBBReference(MBB))
+ return true;
+ Dest = MachineOperand::CreateMBB(MBB);
+ lex();
+ return false;
+}
+
+bool MIParser::parseStackFrameIndex(int &FI) {
+ assert(Token.is(MIToken::StackObject));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ObjectInfo = PFS.StackObjectSlots.find(ID);
+ if (ObjectInfo == PFS.StackObjectSlots.end())
+ return error(Twine("use of undefined stack object '%stack.") + Twine(ID) +
+ "'");
+ StringRef Name;
+ if (const auto *Alloca =
+ MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second))
+ Name = Alloca->getName();
+ if (!Token.stringValue().empty() && Token.stringValue() != Name)
+ return error(Twine("the name of the stack object '%stack.") + Twine(ID) +
+ "' isn't '" + Token.stringValue() + "'");
+ lex();
+ FI = ObjectInfo->second;
+ return false;
+}
+
+bool MIParser::parseStackObjectOperand(MachineOperand &Dest) {
+ int FI;
+ if (parseStackFrameIndex(FI))
+ return true;
+ Dest = MachineOperand::CreateFI(FI);
+ return false;
+}
+
+bool MIParser::parseFixedStackFrameIndex(int &FI) {
+ assert(Token.is(MIToken::FixedStackObject));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ObjectInfo = PFS.FixedStackObjectSlots.find(ID);
+ if (ObjectInfo == PFS.FixedStackObjectSlots.end())
+ return error(Twine("use of undefined fixed stack object '%fixed-stack.") +
+ Twine(ID) + "'");
+ lex();
+ FI = ObjectInfo->second;
+ return false;
+}
+
+bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
+ int FI;
+ if (parseFixedStackFrameIndex(FI))
+ return true;
+ Dest = MachineOperand::CreateFI(FI);
+ return false;
+}
+
+bool MIParser::parseGlobalValue(GlobalValue *&GV) {
+ switch (Token.kind()) {
+ case MIToken::NamedGlobalValue: {
+ const Module *M = MF.getFunction()->getParent();
+ GV = M->getNamedValue(Token.stringValue());
+ if (!GV)
+ return error(Twine("use of undefined global value '") + Token.range() +
+ "'");
+ break;
+ }
+ case MIToken::GlobalValue: {
+ unsigned GVIdx;
+ if (getUnsigned(GVIdx))
+ return true;
+ if (GVIdx >= PFS.IRSlots.GlobalValues.size())
+ return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
+ "'");
+ GV = PFS.IRSlots.GlobalValues[GVIdx];
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be a global value");
+ }
+ return false;
+}
+
+bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ lex();
+ Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseConstantPoolIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::ConstantPoolItem));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto ConstantInfo = PFS.ConstantPoolSlots.find(ID);
+ if (ConstantInfo == PFS.ConstantPoolSlots.end())
+ return error("use of undefined constant '%const." + Twine(ID) + "'");
+ lex();
+ Dest = MachineOperand::CreateCPI(ID, /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseJumpTableIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::JumpTableIndex));
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto JumpTableEntryInfo = PFS.JumpTableSlots.find(ID);
+ if (JumpTableEntryInfo == PFS.JumpTableSlots.end())
+ return error("use of undefined jump table '%jump-table." + Twine(ID) + "'");
+ lex();
+ Dest = MachineOperand::CreateJTI(JumpTableEntryInfo->second);
+ return false;
+}
+
+bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::ExternalSymbol));
+ const char *Symbol = MF.createExternalSymbolName(Token.stringValue());
+ lex();
+ Dest = MachineOperand::CreateES(Symbol);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::SubRegisterIndex));
+ StringRef Name = Token.stringValue();
+ unsigned SubRegIndex = getSubRegIndex(Token.stringValue());
+ if (SubRegIndex == 0)
+ return error(Twine("unknown subregister index '") + Name + "'");
+ lex();
+ Dest = MachineOperand::CreateImm(SubRegIndex);
+ return false;
+}
+
+bool MIParser::parseMDNode(MDNode *&Node) {
+ assert(Token.is(MIToken::exclaim));
+ auto Loc = Token.location();
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected metadata id after '!'");
+ unsigned ID;
+ if (getUnsigned(ID))
+ return true;
+ auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID);
+ if (NodeInfo == PFS.IRSlots.MetadataNodes.end())
+ return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'");
+ lex();
+ Node = NodeInfo->second.get();
+ return false;
+}
+
+bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
+ MDNode *Node = nullptr;
+ if (parseMDNode(Node))
+ return true;
+ Dest = MachineOperand::CreateMetadata(Node);
+ return false;
+}
+
+bool MIParser::parseCFIOffset(int &Offset) {
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected a cfi offset");
+ if (Token.integerValue().getMinSignedBits() > 32)
+ return error("expected a 32 bit integer (the cfi offset is too large)");
+ Offset = (int)Token.integerValue().getExtValue();
+ lex();
+ return false;
+}
+
+bool MIParser::parseCFIRegister(unsigned &Reg) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a cfi register");
+ unsigned LLVMReg;
+ if (parseRegister(LLVMReg))
+ return true;
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ int DwarfReg = TRI->getDwarfRegNum(LLVMReg, true);
+ if (DwarfReg < 0)
+ return error("invalid DWARF register");
+ Reg = (unsigned)DwarfReg;
+ lex();
+ return false;
+}
+
+bool MIParser::parseCFIOperand(MachineOperand &Dest) {
+ auto Kind = Token.kind();
+ lex();
+ auto &MMI = MF.getMMI();
+ int Offset;
+ unsigned Reg;
+ unsigned CFIIndex;
+ switch (Kind) {
+ case MIToken::kw_cfi_same_value:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_offset:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
+ break;
+ case MIToken::kw_cfi_def_cfa_register:
+ if (parseCFIRegister(Reg))
+ return true;
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+ break;
+ case MIToken::kw_cfi_def_cfa_offset:
+ if (parseCFIOffset(Offset))
+ return true;
+ // NB: MCCFIInstruction::createDefCfaOffset negates the offset.
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
+ break;
+ case MIToken::kw_cfi_def_cfa:
+ if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+ parseCFIOffset(Offset))
+ return true;
+ // NB: MCCFIInstruction::createDefCfa negates the offset.
+ CFIIndex =
+ MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
+ break;
+ default:
+ // TODO: Parse the other CFI operands.
+ llvm_unreachable("The current token should be a cfi operand");
+ }
+ Dest = MachineOperand::CreateCFIIndex(CFIIndex);
+ return false;
+}
+
+bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
+ switch (Token.kind()) {
+ case MIToken::NamedIRBlock: {
+ BB = dyn_cast_or_null<BasicBlock>(
+ F.getValueSymbolTable().lookup(Token.stringValue()));
+ if (!BB)
+ return error(Twine("use of undefined IR block '") + Token.range() + "'");
+ break;
+ }
+ case MIToken::IRBlock: {
+ unsigned SlotNumber = 0;
+ if (getUnsigned(SlotNumber))
+ return true;
+ BB = const_cast<BasicBlock *>(getIRBlock(SlotNumber, F));
+ if (!BB)
+ return error(Twine("use of undefined IR block '%ir-block.") +
+ Twine(SlotNumber) + "'");
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be an IR block reference");
+ }
+ return false;
+}
+
+bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_blockaddress));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::GlobalValue) &&
+ Token.isNot(MIToken::NamedGlobalValue))
+ return error("expected a global value");
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ auto *F = dyn_cast<Function>(GV);
+ if (!F)
+ return error("expected an IR function reference");
+ lex();
+ if (expectAndConsume(MIToken::comma))
+ return true;
+ BasicBlock *BB = nullptr;
+ if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))
+ return error("expected an IR block reference");
+ if (parseIRBlock(BB, *F))
+ return true;
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateBA(BlockAddress::get(F, BB), /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_target_index));
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target index");
+ int Index = 0;
+ if (getTargetIndex(Token.stringValue(), Index))
+ return error("use of undefined target index '" + Token.stringValue() + "'");
+ lex();
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateTargetIndex(unsigned(Index), /*Offset=*/0);
+ if (parseOperandsOffset(Dest))
+ return true;
+ return false;
+}
+
+bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_liveout));
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ while (true) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ unsigned Reg = 0;
+ if (parseRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ // TODO: Report an error if the same register is used more than once.
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateRegLiveOut(Mask);
+ return false;
+}
+
+bool MIParser::parseMachineOperand(MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx) {
+ switch (Token.kind()) {
+ case MIToken::kw_implicit:
+ case MIToken::kw_implicit_define:
+ case MIToken::kw_def:
+ case MIToken::kw_dead:
+ case MIToken::kw_killed:
+ case MIToken::kw_undef:
+ case MIToken::kw_internal:
+ case MIToken::kw_early_clobber:
+ case MIToken::kw_debug_use:
+ case MIToken::underscore:
+ case MIToken::NamedRegister:
+ case MIToken::VirtualRegister:
+ return parseRegisterOperand(Dest, TiedDefIdx);
+ case MIToken::IntegerLiteral:
+ return parseImmediateOperand(Dest);
+ case MIToken::IntegerType:
+ return parseTypedImmediateOperand(Dest);
+ case MIToken::kw_half:
+ case MIToken::kw_float:
+ case MIToken::kw_double:
+ case MIToken::kw_x86_fp80:
+ case MIToken::kw_fp128:
+ case MIToken::kw_ppc_fp128:
+ return parseFPImmediateOperand(Dest);
+ case MIToken::MachineBasicBlock:
+ return parseMBBOperand(Dest);
+ case MIToken::StackObject:
+ return parseStackObjectOperand(Dest);
+ case MIToken::FixedStackObject:
+ return parseFixedStackObjectOperand(Dest);
+ case MIToken::GlobalValue:
+ case MIToken::NamedGlobalValue:
+ return parseGlobalAddressOperand(Dest);
+ case MIToken::ConstantPoolItem:
+ return parseConstantPoolIndexOperand(Dest);
+ case MIToken::JumpTableIndex:
+ return parseJumpTableIndexOperand(Dest);
+ case MIToken::ExternalSymbol:
+ return parseExternalSymbolOperand(Dest);
+ case MIToken::SubRegisterIndex:
+ return parseSubRegisterIndexOperand(Dest);
+ case MIToken::exclaim:
+ return parseMetadataOperand(Dest);
+ case MIToken::kw_cfi_same_value:
+ case MIToken::kw_cfi_offset:
+ case MIToken::kw_cfi_def_cfa_register:
+ case MIToken::kw_cfi_def_cfa_offset:
+ case MIToken::kw_cfi_def_cfa:
+ return parseCFIOperand(Dest);
+ case MIToken::kw_blockaddress:
+ return parseBlockAddressOperand(Dest);
+ case MIToken::kw_target_index:
+ return parseTargetIndexOperand(Dest);
+ case MIToken::kw_liveout:
+ return parseLiveoutRegisterMaskOperand(Dest);
+ case MIToken::Error:
+ return true;
+ case MIToken::Identifier:
+ if (const auto *RegMask = getRegMask(Token.stringValue())) {
+ Dest = MachineOperand::CreateRegMask(RegMask);
+ lex();
+ break;
+ }
+ // fallthrough
+ default:
+ // FIXME: Parse the MCSymbol machine operand.
+ return error("expected a machine operand");
+ }
+ return false;
+}
+
+bool MIParser::parseMachineOperandAndTargetFlags(
+ MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) {
+ unsigned TF = 0;
+ bool HasTargetFlags = false;
+ if (Token.is(MIToken::kw_target_flags)) {
+ HasTargetFlags = true;
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target flag");
+ if (getDirectTargetFlag(Token.stringValue(), TF)) {
+ if (getBitmaskTargetFlag(Token.stringValue(), TF))
+ return error("use of undefined target flag '" + Token.stringValue() +
+ "'");
+ }
+ lex();
+ while (Token.is(MIToken::comma)) {
+ lex();
+ if (Token.isNot(MIToken::Identifier))
+ return error("expected the name of the target flag");
+ unsigned BitFlag = 0;
+ if (getBitmaskTargetFlag(Token.stringValue(), BitFlag))
+ return error("use of undefined target flag '" + Token.stringValue() +
+ "'");
+ // TODO: Report an error when using a duplicate bit target flag.
+ TF |= BitFlag;
+ lex();
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ }
+ auto Loc = Token.location();
+ if (parseMachineOperand(Dest, TiedDefIdx))
+ return true;
+ if (!HasTargetFlags)
+ return false;
+ if (Dest.isReg())
+ return error(Loc, "register operands can't have target flags");
+ Dest.setTargetFlags(TF);
+ return false;
+}
+
+bool MIParser::parseOffset(int64_t &Offset) {
+ if (Token.isNot(MIToken::plus) && Token.isNot(MIToken::minus))
+ return false;
+ StringRef Sign = Token.range();
+ bool IsNegative = Token.is(MIToken::minus);
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after '" + Sign + "'");
+ if (Token.integerValue().getMinSignedBits() > 64)
+ return error("expected 64-bit integer (too large)");
+ Offset = Token.integerValue().getExtValue();
+ if (IsNegative)
+ Offset = -Offset;
+ lex();
+ return false;
+}
+
+bool MIParser::parseAlignment(unsigned &Alignment) {
+ assert(Token.is(MIToken::kw_align));
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+ return error("expected an integer literal after 'align'");
+ if (getUnsigned(Alignment))
+ return true;
+ lex();
+ return false;
+}
+
+bool MIParser::parseOperandsOffset(MachineOperand &Op) {
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Op.setOffset(Offset);
+ return false;
+}
+
+bool MIParser::parseIRValue(const Value *&V) {
+ switch (Token.kind()) {
+ case MIToken::NamedIRValue: {
+ V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
+ break;
+ }
+ case MIToken::IRValue: {
+ unsigned SlotNumber = 0;
+ if (getUnsigned(SlotNumber))
+ return true;
+ V = getIRValue(SlotNumber);
+ break;
+ }
+ case MIToken::NamedGlobalValue:
+ case MIToken::GlobalValue: {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ V = GV;
+ break;
+ }
+ case MIToken::QuotedIRValue: {
+ const Constant *C = nullptr;
+ if (parseIRConstant(Token.location(), Token.stringValue(), C))
+ return true;
+ V = C;
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be an IR block reference");
+ }
+ if (!V)
+ return error(Twine("use of undefined IR value '") + Token.range() + "'");
+ return false;
+}
+
+bool MIParser::getUint64(uint64_t &Result) {
+ assert(Token.hasIntegerValue());
+ if (Token.integerValue().getActiveBits() > 64)
+ return error("expected 64-bit integer (too large)");
+ Result = Token.integerValue().getZExtValue();
+ return false;
+}
+
+bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
+ const auto OldFlags = Flags;
+ switch (Token.kind()) {
+ case MIToken::kw_volatile:
+ Flags |= MachineMemOperand::MOVolatile;
+ break;
+ case MIToken::kw_non_temporal:
+ Flags |= MachineMemOperand::MONonTemporal;
+ break;
+ case MIToken::kw_invariant:
+ Flags |= MachineMemOperand::MOInvariant;
+ break;
+ // TODO: parse the target specific memory operand flags.
+ default:
+ llvm_unreachable("The current token should be a memory operand flag");
+ }
+ if (OldFlags == Flags)
+ // We know that the same flag is specified more than once when the flags
+ // weren't modified.
+ return error("duplicate '" + Token.stringValue() + "' memory operand flag");
+ lex();
+ return false;
+}
+
+bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
+ switch (Token.kind()) {
+ case MIToken::kw_stack:
+ PSV = MF.getPSVManager().getStack();
+ break;
+ case MIToken::kw_got:
+ PSV = MF.getPSVManager().getGOT();
+ break;
+ case MIToken::kw_jump_table:
+ PSV = MF.getPSVManager().getJumpTable();
+ break;
+ case MIToken::kw_constant_pool:
+ PSV = MF.getPSVManager().getConstantPool();
+ break;
+ case MIToken::FixedStackObject: {
+ int FI;
+ if (parseFixedStackFrameIndex(FI))
+ return true;
+ PSV = MF.getPSVManager().getFixedStack(FI);
+ // The token was already consumed, so use return here instead of break.
+ return false;
+ }
+ case MIToken::StackObject: {
+ int FI;
+ if (parseStackFrameIndex(FI))
+ return true;
+ PSV = MF.getPSVManager().getFixedStack(FI);
+ // The token was already consumed, so use return here instead of break.
+ return false;
+ }
+ case MIToken::kw_call_entry: {
+ lex();
+ switch (Token.kind()) {
+ case MIToken::GlobalValue:
+ case MIToken::NamedGlobalValue: {
+ GlobalValue *GV = nullptr;
+ if (parseGlobalValue(GV))
+ return true;
+ PSV = MF.getPSVManager().getGlobalValueCallEntry(GV);
+ break;
+ }
+ case MIToken::ExternalSymbol:
+ PSV = MF.getPSVManager().getExternalSymbolCallEntry(
+ MF.createExternalSymbolName(Token.stringValue()));
+ break;
+ default:
+ return error(
+ "expected a global value or an external symbol after 'call-entry'");
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("The current token should be pseudo source value");
+ }
+ lex();
+ return false;
+}
+
+bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
+ if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
+ Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
+ Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) ||
+ Token.is(MIToken::kw_call_entry)) {
+ const PseudoSourceValue *PSV = nullptr;
+ if (parseMemoryPseudoSourceValue(PSV))
+ return true;
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Dest = MachinePointerInfo(PSV, Offset);
+ return false;
+ }
+ if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) &&
+ Token.isNot(MIToken::GlobalValue) &&
+ Token.isNot(MIToken::NamedGlobalValue) &&
+ Token.isNot(MIToken::QuotedIRValue))
+ return error("expected an IR value reference");
+ const Value *V = nullptr;
+ if (parseIRValue(V))
+ return true;
+ if (!V->getType()->isPointerTy())
+ return error("expected a pointer IR value");
+ lex();
+ int64_t Offset = 0;
+ if (parseOffset(Offset))
+ return true;
+ Dest = MachinePointerInfo(V, Offset);
+ return false;
+}
+
+bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
+ while (Token.isMemoryOperandFlag()) {
+ if (parseMemoryOperandFlag(Flags))
+ return true;
+ }
+ if (Token.isNot(MIToken::Identifier) ||
+ (Token.stringValue() != "load" && Token.stringValue() != "store"))
+ return error("expected 'load' or 'store' memory operation");
+ if (Token.stringValue() == "load")
+ Flags |= MachineMemOperand::MOLoad;
+ else
+ Flags |= MachineMemOperand::MOStore;
+ lex();
+
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected the size integer literal after memory operation");
+ uint64_t Size;
+ if (getUint64(Size))
+ return true;
+ lex();
+
+ MachinePointerInfo Ptr = MachinePointerInfo();
+ if (Token.is(MIToken::Identifier)) {
+ const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into";
+ if (Token.stringValue() != Word)
+ return error(Twine("expected '") + Word + "'");
+ lex();
+
+ if (parseMachinePointerInfo(Ptr))
+ return true;
+ }
+ unsigned BaseAlignment = Size;
+ AAMDNodes AAInfo;
+ MDNode *Range = nullptr;
+ while (consumeIfPresent(MIToken::comma)) {
+ switch (Token.kind()) {
+ case MIToken::kw_align:
+ if (parseAlignment(BaseAlignment))
+ return true;
+ break;
+ case MIToken::md_tbaa:
+ lex();
+ if (parseMDNode(AAInfo.TBAA))
+ return true;
+ break;
+ case MIToken::md_alias_scope:
+ lex();
+ if (parseMDNode(AAInfo.Scope))
+ return true;
+ break;
+ case MIToken::md_noalias:
+ lex();
+ if (parseMDNode(AAInfo.NoAlias))
+ return true;
+ break;
+ case MIToken::md_range:
+ lex();
+ if (parseMDNode(Range))
+ return true;
+ break;
+ // TODO: Report an error on duplicate metadata nodes.
+ default:
+ return error("expected 'align' or '!tbaa' or '!alias.scope' or "
+ "'!noalias' or '!range'");
+ }
+ }
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest =
+ MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range);
+ return false;
+}
+
+void MIParser::initNames2InstrOpCodes() {
+ if (!Names2InstrOpCodes.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I)
+ Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I));
+}
+
+bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) {
+ initNames2InstrOpCodes();
+ auto InstrInfo = Names2InstrOpCodes.find(InstrName);
+ if (InstrInfo == Names2InstrOpCodes.end())
+ return true;
+ OpCode = InstrInfo->getValue();
+ return false;
+}
+
+void MIParser::initNames2Regs() {
+ if (!Names2Regs.empty())
+ return;
+ // The '%noreg' register is the register 0.
+ Names2Regs.insert(std::make_pair("noreg", 0));
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
+ bool WasInserted =
+ Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I))
+ .second;
+ (void)WasInserted;
+ assert(WasInserted && "Expected registers to be unique case-insensitively");
+ }
+}
+
+bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) {
+ initNames2Regs();
+ auto RegInfo = Names2Regs.find(RegName);
+ if (RegInfo == Names2Regs.end())
+ return true;
+ Reg = RegInfo->getValue();
+ return false;
+}
+
+void MIParser::initNames2RegMasks() {
+ if (!Names2RegMasks.empty())
+ return;
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks();
+ ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames();
+ assert(RegMasks.size() == RegMaskNames.size());
+ for (size_t I = 0, E = RegMasks.size(); I < E; ++I)
+ Names2RegMasks.insert(
+ std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I]));
+}
+
+const uint32_t *MIParser::getRegMask(StringRef Identifier) {
+ initNames2RegMasks();
+ auto RegMaskInfo = Names2RegMasks.find(Identifier);
+ if (RegMaskInfo == Names2RegMasks.end())
+ return nullptr;
+ return RegMaskInfo->getValue();
+}
+
+void MIParser::initNames2SubRegIndices() {
+ if (!Names2SubRegIndices.empty())
+ return;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
+ Names2SubRegIndices.insert(
+ std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I));
+}
+
+unsigned MIParser::getSubRegIndex(StringRef Name) {
+ initNames2SubRegIndices();
+ auto SubRegInfo = Names2SubRegIndices.find(Name);
+ if (SubRegInfo == Names2SubRegIndices.end())
+ return 0;
+ return SubRegInfo->getValue();
+}
+
+static void initSlots2BasicBlocks(
+ const Function &F,
+ DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (auto &BB : F) {
+ if (BB.hasName())
+ continue;
+ int Slot = MST.getLocalSlot(&BB);
+ if (Slot == -1)
+ continue;
+ Slots2BasicBlocks.insert(std::make_pair(unsigned(Slot), &BB));
+ }
+}
+
+static const BasicBlock *getIRBlockFromSlot(
+ unsigned Slot,
+ const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+ auto BlockInfo = Slots2BasicBlocks.find(Slot);
+ if (BlockInfo == Slots2BasicBlocks.end())
+ return nullptr;
+ return BlockInfo->second;
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot) {
+ if (Slots2BasicBlocks.empty())
+ initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks);
+ return getIRBlockFromSlot(Slot, Slots2BasicBlocks);
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
+ if (&F == MF.getFunction())
+ return getIRBlock(Slot);
+ DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks;
+ initSlots2BasicBlocks(F, CustomSlots2BasicBlocks);
+ return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks);
+}
+
+static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ int Slot = MST.getLocalSlot(V);
+ if (Slot == -1)
+ return;
+ Slots2Values.insert(std::make_pair(unsigned(Slot), V));
+}
+
+/// Creates the mapping from slot numbers to function's unnamed IR values.
+static void initSlots2Values(const Function &F,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (const auto &Arg : F.args())
+ mapValueToSlot(&Arg, MST, Slots2Values);
+ for (const auto &BB : F) {
+ mapValueToSlot(&BB, MST, Slots2Values);
+ for (const auto &I : BB)
+ mapValueToSlot(&I, MST, Slots2Values);
+ }
+}
+
+const Value *MIParser::getIRValue(unsigned Slot) {
+ if (Slots2Values.empty())
+ initSlots2Values(*MF.getFunction(), Slots2Values);
+ auto ValueInfo = Slots2Values.find(Slot);
+ if (ValueInfo == Slots2Values.end())
+ return nullptr;
+ return ValueInfo->second;
+}
+
+void MIParser::initNames2TargetIndices() {
+ if (!Names2TargetIndices.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ for (const auto &I : Indices)
+ Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getTargetIndex(StringRef Name, int &Index) {
+ initNames2TargetIndices();
+ auto IndexInfo = Names2TargetIndices.find(Name);
+ if (IndexInfo == Names2TargetIndices.end())
+ return true;
+ Index = IndexInfo->second;
+ return false;
+}
+
+void MIParser::initNames2DirectTargetFlags() {
+ if (!Names2DirectTargetFlags.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2DirectTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) {
+ initNames2DirectTargetFlags();
+ auto FlagInfo = Names2DirectTargetFlags.find(Name);
+ if (FlagInfo == Names2DirectTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void MIParser::initNames2BitmaskTargetFlags() {
+ if (!Names2BitmaskTargetFlags.empty())
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2BitmaskTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
+ initNames2BitmaskTargetFlags();
+ auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
+ if (FlagInfo == Names2BitmaskTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
+ StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseBasicBlockDefinitions(PFS.MBBSlots);
+}
+
+bool llvm::parseMachineInstructions(const PerFunctionMIParsingState &PFS,
+ StringRef Src, SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseBasicBlocks();
+}
+
+bool llvm::parseMBBReference(const PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneMBB(MBB);
+}
+
+bool llvm::parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg);
+}
+
+bool llvm::parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Reg);
+}
+
+bool llvm::parseStackObjectReference(const PerFunctionMIParsingState &PFS,
+ int &FI, StringRef Src,
+ SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneStackObject(FI);
+}
+
+bool llvm::parseMDNode(const PerFunctionMIParsingState &PFS,
+ MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
+ return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
new file mode 100644
index 000000000000..18895b9e54eb
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
@@ -0,0 +1,99 @@
+//===- MIParser.h - Machine Instructions Parser ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the function that parses the machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
+#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+
+namespace llvm {
+
+class StringRef;
+class BasicBlock;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class MachineRegisterInfo;
+class MDNode;
+struct SlotMapping;
+class SMDiagnostic;
+class SourceMgr;
+
+struct PerFunctionMIParsingState {
+ MachineFunction &MF;
+ SourceMgr *SM;
+ const SlotMapping &IRSlots;
+
+ DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
+ DenseMap<unsigned, unsigned> VirtualRegisterSlots;
+ DenseMap<unsigned, int> FixedStackObjectSlots;
+ DenseMap<unsigned, int> StackObjectSlots;
+ DenseMap<unsigned, unsigned> ConstantPoolSlots;
+ DenseMap<unsigned, unsigned> JumpTableSlots;
+ /// Hold the generic virtual registers.
+ SmallSet<unsigned, 8> GenericVRegs;
+
+ PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
+ const SlotMapping &IRSlots);
+};
+
+/// Parse the machine basic block definitions, and skip the machine
+/// instructions.
+///
+/// This function runs the first parsing pass on the machine function's body.
+/// It parses only the machine basic block definitions and creates the machine
+/// basic blocks in the given machine function.
+///
+/// The machine instructions aren't parsed during the first pass because all
+/// the machine basic blocks aren't defined yet - this makes it impossible to
+/// resolve the machine basic block references.
+///
+/// Return true if an error occurred.
+bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
+ StringRef Src, SMDiagnostic &Error);
+
+/// Parse the machine instructions.
+///
+/// This function runs the second parsing pass on the machine function's body.
+/// It skips the machine basic block definitions and parses only the machine
+/// instructions and basic block attributes like liveins and successors.
+///
+/// The second parsing pass assumes that the first parsing pass already ran
+/// on the given source string.
+///
+/// Return true if an error occurred.
+bool parseMachineInstructions(const PerFunctionMIParsingState &PFS,
+ StringRef Src, SMDiagnostic &Error);
+
+bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB, StringRef Src,
+ SMDiagnostic &Error);
+
+bool parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
+ SMDiagnostic &Error);
+
+bool parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
+ unsigned &Reg, StringRef Src,
+ SMDiagnostic &Error);
+
+bool parseStackObjectReference(const PerFunctionMIParsingState &PFS,
+ int &FI, StringRef Src, SMDiagnostic &Error);
+
+bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
+ StringRef Src, SMDiagnostic &Error);
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
new file mode 100644
index 000000000000..4aa3df6326e9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -0,0 +1,789 @@
+//===- MIRParser.cpp - MIR serialization format parser implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the class that parses the optional LLVM IR and machine
+// functions that are stored in MIR files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "MIParser.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <memory>
+
+using namespace llvm;
+
+namespace llvm {
+
+/// This class implements the parsing of LLVM IR that's embedded inside a MIR
+/// file.
+class MIRParserImpl {
+ SourceMgr SM;
+ StringRef Filename;
+ LLVMContext &Context;
+ StringMap<std::unique_ptr<yaml::MachineFunction>> Functions;
+ SlotMapping IRSlots;
+ /// Maps from register class names to register classes.
+ StringMap<const TargetRegisterClass *> Names2RegClasses;
+ /// Maps from register bank names to register banks.
+ StringMap<const RegisterBank *> Names2RegBanks;
+
+public:
+ MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
+ LLVMContext &Context);
+
+ void reportDiagnostic(const SMDiagnostic &Diag);
+
+ /// Report an error with the given message at unknown location.
+ ///
+ /// Always returns true.
+ bool error(const Twine &Message);
+
+ /// Report an error with the given message at the given location.
+ ///
+ /// Always returns true.
+ bool error(SMLoc Loc, const Twine &Message);
+
+ /// Report a given error with the location translated from the location in an
+ /// embedded string literal to a location in the MIR file.
+ ///
+ /// Always returns true.
+ bool error(const SMDiagnostic &Error, SMRange SourceRange);
+
+ /// Try to parse the optional LLVM module and the machine functions in the MIR
+ /// file.
+ ///
+ /// Return null if an error occurred.
+ std::unique_ptr<Module> parse();
+
+ /// Parse the machine function in the current YAML document.
+ ///
+ /// \param NoLLVMIR - set to true when the MIR file doesn't have LLVM IR.
+ /// A dummy IR function is created and inserted into the given module when
+ /// this parameter is true.
+ ///
+ /// Return true if an error occurred.
+ bool parseMachineFunction(yaml::Input &In, Module &M, bool NoLLVMIR);
+
+ /// Initialize the machine function to the state that's described in the MIR
+ /// file.
+ ///
+ /// Return true if error occurred.
+ bool initializeMachineFunction(MachineFunction &MF);
+
+ bool initializeRegisterInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
+
+ void inferRegisterInfo(const PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
+
+ bool initializeFrameInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
+
+ bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
+ std::vector<CalleeSavedInfo> &CSIInfo,
+ const yaml::StringValue &RegisterSource,
+ int FrameIdx);
+
+ bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineStackObject &Object,
+ int FrameIdx);
+
+ bool initializeConstantPool(PerFunctionMIParsingState &PFS,
+ MachineConstantPool &ConstantPool,
+ const yaml::MachineFunction &YamlMF);
+
+ bool initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineJumpTable &YamlJTI);
+
+private:
+ bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
+ const yaml::StringValue &Source);
+
+ bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source);
+
+ /// Return a MIR diagnostic converted from an MI string diagnostic.
+ SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange);
+
+ /// Return a MIR diagnostic converted from a diagnostic located in a YAML
+ /// block scalar string.
+ SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange);
+
+ /// Create an empty function with the given name.
+ void createDummyFunction(StringRef Name, Module &M);
+
+ void initNames2RegClasses(const MachineFunction &MF);
+ void initNames2RegBanks(const MachineFunction &MF);
+
+ /// Check if the given identifier is a name of a register class.
+ ///
+ /// Return null if the name isn't a register class.
+ const TargetRegisterClass *getRegClass(const MachineFunction &MF,
+ StringRef Name);
+
+ /// Check if the given identifier is a name of a register bank.
+ ///
+ /// Return null if the name isn't a register bank.
+ const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name);
+};
+
+} // end namespace llvm
+
+MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
+ StringRef Filename, LLVMContext &Context)
+ : SM(), Filename(Filename), Context(Context) {
+ SM.AddNewSourceBuffer(std::move(Contents), SMLoc());
+}
+
+bool MIRParserImpl::error(const Twine &Message) {
+ Context.diagnose(DiagnosticInfoMIRParser(
+ DS_Error, SMDiagnostic(Filename, SourceMgr::DK_Error, Message.str())));
+ return true;
+}
+
+bool MIRParserImpl::error(SMLoc Loc, const Twine &Message) {
+ Context.diagnose(DiagnosticInfoMIRParser(
+ DS_Error, SM.GetMessage(Loc, SourceMgr::DK_Error, Message)));
+ return true;
+}
+
+bool MIRParserImpl::error(const SMDiagnostic &Error, SMRange SourceRange) {
+ assert(Error.getKind() == SourceMgr::DK_Error && "Expected an error");
+ reportDiagnostic(diagFromMIStringDiag(Error, SourceRange));
+ return true;
+}
+
+void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) {
+ DiagnosticSeverity Kind;
+ switch (Diag.getKind()) {
+ case SourceMgr::DK_Error:
+ Kind = DS_Error;
+ break;
+ case SourceMgr::DK_Warning:
+ Kind = DS_Warning;
+ break;
+ case SourceMgr::DK_Note:
+ Kind = DS_Note;
+ break;
+ }
+ Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag));
+}
+
+static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
+ reinterpret_cast<MIRParserImpl *>(Context)->reportDiagnostic(Diag);
+}
+
+std::unique_ptr<Module> MIRParserImpl::parse() {
+ yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(),
+ /*Ctxt=*/nullptr, handleYAMLDiag, this);
+ In.setContext(&In);
+
+ if (!In.setCurrentDocument()) {
+ if (In.error())
+ return nullptr;
+ // Create an empty module when the MIR file is empty.
+ return llvm::make_unique<Module>(Filename, Context);
+ }
+
+ std::unique_ptr<Module> M;
+ bool NoLLVMIR = false;
+ // Parse the block scalar manually so that we can return unique pointer
+ // without having to go trough YAML traits.
+ if (const auto *BSN =
+ dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) {
+ SMDiagnostic Error;
+ M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
+ Context, &IRSlots);
+ if (!M) {
+ reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange()));
+ return nullptr;
+ }
+ In.nextDocument();
+ if (!In.setCurrentDocument())
+ return M;
+ } else {
+ // Create an new, empty module.
+ M = llvm::make_unique<Module>(Filename, Context);
+ NoLLVMIR = true;
+ }
+
+ // Parse the machine functions.
+ do {
+ if (parseMachineFunction(In, *M, NoLLVMIR))
+ return nullptr;
+ In.nextDocument();
+ } while (In.setCurrentDocument());
+
+ return M;
+}
+
+bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M,
+ bool NoLLVMIR) {
+ auto MF = llvm::make_unique<yaml::MachineFunction>();
+ yaml::yamlize(In, *MF, false);
+ if (In.error())
+ return true;
+ auto FunctionName = MF->Name;
+ if (Functions.find(FunctionName) != Functions.end())
+ return error(Twine("redefinition of machine function '") + FunctionName +
+ "'");
+ Functions.insert(std::make_pair(FunctionName, std::move(MF)));
+ if (NoLLVMIR)
+ createDummyFunction(FunctionName, M);
+ else if (!M.getFunction(FunctionName))
+ return error(Twine("function '") + FunctionName +
+ "' isn't defined in the provided LLVM IR");
+ return false;
+}
+
+void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
+ auto &Context = M.getContext();
+ Function *F = cast<Function>(M.getOrInsertFunction(
+ Name, FunctionType::get(Type::getVoidTy(Context), false)));
+ BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
+ new UnreachableInst(Context, BB);
+}
+
+bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
+ auto It = Functions.find(MF.getName());
+ if (It == Functions.end())
+ return error(Twine("no machine function information for function '") +
+ MF.getName() + "' in the MIR file");
+ // TODO: Recreate the machine function.
+ const yaml::MachineFunction &YamlMF = *It->getValue();
+ if (YamlMF.Alignment)
+ MF.setAlignment(YamlMF.Alignment);
+ MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
+ MF.setHasInlineAsm(YamlMF.HasInlineAsm);
+ if (YamlMF.AllVRegsAllocated)
+ MF.getProperties().set(MachineFunctionProperties::Property::AllVRegsAllocated);
+ PerFunctionMIParsingState PFS(MF, SM, IRSlots);
+ if (initializeRegisterInfo(PFS, YamlMF))
+ return true;
+ if (!YamlMF.Constants.empty()) {
+ auto *ConstantPool = MF.getConstantPool();
+ assert(ConstantPool && "Constant pool must be created");
+ if (initializeConstantPool(PFS, *ConstantPool, YamlMF))
+ return true;
+ }
+
+ StringRef BlockStr = YamlMF.Body.Value.Value;
+ SMDiagnostic Error;
+ SourceMgr BlockSM;
+ BlockSM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(BlockStr, "",/*RequiresNullTerminator=*/false),
+ SMLoc());
+ PFS.SM = &BlockSM;
+ if (parseMachineBasicBlockDefinitions(PFS, BlockStr, Error)) {
+ reportDiagnostic(
+ diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+ return true;
+ }
+ PFS.SM = &SM;
+
+ if (MF.empty())
+ return error(Twine("machine function '") + Twine(MF.getName()) +
+ "' requires at least one machine basic block in its body");
+ // Initialize the frame information after creating all the MBBs so that the
+ // MBB references in the frame information can be resolved.
+ if (initializeFrameInfo(PFS, YamlMF))
+ return true;
+ // Initialize the jump table after creating all the MBBs so that the MBB
+ // references can be resolved.
+ if (!YamlMF.JumpTableInfo.Entries.empty() &&
+ initializeJumpTableInfo(PFS, YamlMF.JumpTableInfo))
+ return true;
+ // Parse the machine instructions after creating all of the MBBs so that the
+ // parser can resolve the MBB references.
+ StringRef InsnStr = YamlMF.Body.Value.Value;
+ SourceMgr InsnSM;
+ InsnSM.AddNewSourceBuffer(
+ MemoryBuffer::getMemBuffer(InsnStr, "", /*RequiresNullTerminator=*/false),
+ SMLoc());
+ PFS.SM = &InsnSM;
+ if (parseMachineInstructions(PFS, InsnStr, Error)) {
+ reportDiagnostic(
+ diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+ return true;
+ }
+ PFS.SM = &SM;
+
+ inferRegisterInfo(PFS, YamlMF);
+ // FIXME: This is a temporary workaround until the reserved registers can be
+ // serialized.
+ MF.getRegInfo().freezeReservedRegs(MF);
+ MF.verify();
+ return false;
+}
+
+bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ assert(RegInfo.isSSA());
+ if (!YamlMF.IsSSA)
+ RegInfo.leaveSSA();
+ assert(RegInfo.tracksLiveness());
+ if (!YamlMF.TracksRegLiveness)
+ RegInfo.invalidateLiveness();
+ RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
+
+ SMDiagnostic Error;
+ // Parse the virtual register information.
+ for (const auto &VReg : YamlMF.VirtualRegisters) {
+ unsigned Reg;
+ if (StringRef(VReg.Class.Value).equals("_")) {
+ // This is a generic virtual register.
+ // The size will be set appropriately when we reach the definition.
+ Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
+ PFS.GenericVRegs.insert(Reg);
+ } else {
+ const auto *RC = getRegClass(MF, VReg.Class.Value);
+ if (RC) {
+ Reg = RegInfo.createVirtualRegister(RC);
+ } else {
+ const auto *RegBank = getRegBank(MF, VReg.Class.Value);
+ if (!RegBank)
+ return error(
+ VReg.Class.SourceRange.Start,
+ Twine("use of undefined register class or register bank '") +
+ VReg.Class.Value + "'");
+ Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
+ RegInfo.setRegBank(Reg, *RegBank);
+ PFS.GenericVRegs.insert(Reg);
+ }
+ }
+ if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg))
+ .second)
+ return error(VReg.ID.SourceRange.Start,
+ Twine("redefinition of virtual register '%") +
+ Twine(VReg.ID.Value) + "'");
+ if (!VReg.PreferredRegister.Value.empty()) {
+ unsigned PreferredReg = 0;
+ if (parseNamedRegisterReference(PFS, PreferredReg,
+ VReg.PreferredRegister.Value, Error))
+ return error(Error, VReg.PreferredRegister.SourceRange);
+ RegInfo.setSimpleHint(Reg, PreferredReg);
+ }
+ }
+
+ // Parse the liveins.
+ for (const auto &LiveIn : YamlMF.LiveIns) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(PFS, Reg, LiveIn.Register.Value, Error))
+ return error(Error, LiveIn.Register.SourceRange);
+ unsigned VReg = 0;
+ if (!LiveIn.VirtualRegister.Value.empty()) {
+ if (parseVirtualRegisterReference(PFS, VReg, LiveIn.VirtualRegister.Value,
+ Error))
+ return error(Error, LiveIn.VirtualRegister.SourceRange);
+ }
+ RegInfo.addLiveIn(Reg, VReg);
+ }
+
+ // Parse the callee saved register mask.
+ BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size());
+ if (!YamlMF.CalleeSavedRegisters)
+ return false;
+ for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
+ return error(Error, RegSource.SourceRange);
+ CalleeSavedRegisterMask[Reg] = true;
+ }
+ RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip());
+ return false;
+}
+
+void MIRParserImpl::inferRegisterInfo(const PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ if (YamlMF.CalleeSavedRegisters)
+ return;
+ MachineRegisterInfo &MRI = PFS.MF.getRegInfo();
+ for (const MachineBasicBlock &MBB : PFS.MF) {
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
+ }
+ }
+ }
+}
+
+bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ const Function &F = *MF.getFunction();
+ const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
+ MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
+ MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
+ MFI.setHasStackMap(YamlMFI.HasStackMap);
+ MFI.setHasPatchPoint(YamlMFI.HasPatchPoint);
+ MFI.setStackSize(YamlMFI.StackSize);
+ MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment);
+ if (YamlMFI.MaxAlignment)
+ MFI.ensureMaxAlignment(YamlMFI.MaxAlignment);
+ MFI.setAdjustsStack(YamlMFI.AdjustsStack);
+ MFI.setHasCalls(YamlMFI.HasCalls);
+ MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
+ MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
+ MFI.setHasVAStart(YamlMFI.HasVAStart);
+ MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
+ if (!YamlMFI.SavePoint.Value.empty()) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(PFS, MBB, YamlMFI.SavePoint))
+ return true;
+ MFI.setSavePoint(MBB);
+ }
+ if (!YamlMFI.RestorePoint.Value.empty()) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(PFS, MBB, YamlMFI.RestorePoint))
+ return true;
+ MFI.setRestorePoint(MBB);
+ }
+
+ std::vector<CalleeSavedInfo> CSIInfo;
+ // Initialize the fixed frame objects.
+ for (const auto &Object : YamlMF.FixedStackObjects) {
+ int ObjectIdx;
+ if (Object.Type != yaml::FixedMachineStackObject::SpillSlot)
+ ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset,
+ Object.IsImmutable, Object.IsAliased);
+ else
+ ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
+ MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
+ if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
+ ObjectIdx))
+ .second)
+ return error(Object.ID.SourceRange.Start,
+ Twine("redefinition of fixed stack object '%fixed-stack.") +
+ Twine(Object.ID.Value) + "'");
+ if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
+ ObjectIdx))
+ return true;
+ }
+
+ // Initialize the ordinary frame objects.
+ for (const auto &Object : YamlMF.StackObjects) {
+ int ObjectIdx;
+ const AllocaInst *Alloca = nullptr;
+ const yaml::StringValue &Name = Object.Name;
+ if (!Name.Value.empty()) {
+ Alloca = dyn_cast_or_null<AllocaInst>(
+ F.getValueSymbolTable().lookup(Name.Value));
+ if (!Alloca)
+ return error(Name.SourceRange.Start,
+ "alloca instruction named '" + Name.Value +
+ "' isn't defined in the function '" + F.getName() +
+ "'");
+ }
+ if (Object.Type == yaml::MachineStackObject::VariableSized)
+ ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
+ else
+ ObjectIdx = MFI.CreateStackObject(
+ Object.Size, Object.Alignment,
+ Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
+ MFI.setObjectOffset(ObjectIdx, Object.Offset);
+ if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
+ .second)
+ return error(Object.ID.SourceRange.Start,
+ Twine("redefinition of stack object '%stack.") +
+ Twine(Object.ID.Value) + "'");
+ if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister,
+ ObjectIdx))
+ return true;
+ if (Object.LocalOffset)
+ MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue());
+ if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx))
+ return true;
+ }
+ MFI.setCalleeSavedInfo(CSIInfo);
+ if (!CSIInfo.empty())
+ MFI.setCalleeSavedInfoValid(true);
+
+ // Initialize the various stack object references after initializing the
+ // stack objects.
+ if (!YamlMFI.StackProtector.Value.empty()) {
+ SMDiagnostic Error;
+ int FI;
+ if (parseStackObjectReference(PFS, FI, YamlMFI.StackProtector.Value, Error))
+ return error(Error, YamlMFI.StackProtector.SourceRange);
+ MFI.setStackProtectorIndex(FI);
+ }
+ return false;
+}
+
+bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
+ std::vector<CalleeSavedInfo> &CSIInfo,
+ const yaml::StringValue &RegisterSource, int FrameIdx) {
+ if (RegisterSource.Value.empty())
+ return false;
+ unsigned Reg = 0;
+ SMDiagnostic Error;
+ if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error))
+ return error(Error, RegisterSource.SourceRange);
+ CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx));
+ return false;
+}
+
+/// Verify that given node is of a certain type. Return true on error.
+template <typename T>
+static bool typecheckMDNode(T *&Result, MDNode *Node,
+ const yaml::StringValue &Source,
+ StringRef TypeString, MIRParserImpl &Parser) {
+ if (!Node)
+ return false;
+ Result = dyn_cast<T>(Node);
+ if (!Result)
+ return Parser.error(Source.SourceRange.Start,
+ "expected a reference to a '" + TypeString +
+ "' metadata node");
+ return false;
+}
+
+bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineStackObject &Object, int FrameIdx) {
+ // Debug information can only be attached to stack objects; Fixed stack
+ // objects aren't supported.
+ assert(FrameIdx >= 0 && "Expected a stack object frame index");
+ MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr;
+ if (parseMDNode(PFS, Var, Object.DebugVar) ||
+ parseMDNode(PFS, Expr, Object.DebugExpr) ||
+ parseMDNode(PFS, Loc, Object.DebugLoc))
+ return true;
+ if (!Var && !Expr && !Loc)
+ return false;
+ DILocalVariable *DIVar = nullptr;
+ DIExpression *DIExpr = nullptr;
+ DILocation *DILoc = nullptr;
+ if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) ||
+ typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
+ typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
+ return true;
+ PFS.MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
+ return false;
+}
+
+bool MIRParserImpl::parseMDNode(const PerFunctionMIParsingState &PFS,
+ MDNode *&Node, const yaml::StringValue &Source) {
+ if (Source.Value.empty())
+ return false;
+ SMDiagnostic Error;
+ if (llvm::parseMDNode(PFS, Node, Source.Value, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
+bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS,
+ MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF) {
+ DenseMap<unsigned, unsigned> &ConstantPoolSlots = PFS.ConstantPoolSlots;
+ const MachineFunction &MF = PFS.MF;
+ const auto &M = *MF.getFunction()->getParent();
+ SMDiagnostic Error;
+ for (const auto &YamlConstant : YamlMF.Constants) {
+ const Constant *Value = dyn_cast_or_null<Constant>(
+ parseConstantValue(YamlConstant.Value.Value, Error, M));
+ if (!Value)
+ return error(Error, YamlConstant.Value.SourceRange);
+ unsigned Alignment =
+ YamlConstant.Alignment
+ ? YamlConstant.Alignment
+ : M.getDataLayout().getPrefTypeAlignment(Value->getType());
+ unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment);
+ if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index))
+ .second)
+ return error(YamlConstant.ID.SourceRange.Start,
+ Twine("redefinition of constant pool item '%const.") +
+ Twine(YamlConstant.ID.Value) + "'");
+ }
+ return false;
+}
+
+bool MIRParserImpl::initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineJumpTable &YamlJTI) {
+ MachineJumpTableInfo *JTI = PFS.MF.getOrCreateJumpTableInfo(YamlJTI.Kind);
+ for (const auto &Entry : YamlJTI.Entries) {
+ std::vector<MachineBasicBlock *> Blocks;
+ for (const auto &MBBSource : Entry.Blocks) {
+ MachineBasicBlock *MBB = nullptr;
+ if (parseMBBReference(PFS, MBB, MBBSource.Value))
+ return true;
+ Blocks.push_back(MBB);
+ }
+ unsigned Index = JTI->createJumpTableIndex(Blocks);
+ if (!PFS.JumpTableSlots.insert(std::make_pair(Entry.ID.Value, Index))
+ .second)
+ return error(Entry.ID.SourceRange.Start,
+ Twine("redefinition of jump table entry '%jump-table.") +
+ Twine(Entry.ID.Value) + "'");
+ }
+ return false;
+}
+
+bool MIRParserImpl::parseMBBReference(const PerFunctionMIParsingState &PFS,
+ MachineBasicBlock *&MBB,
+ const yaml::StringValue &Source) {
+ SMDiagnostic Error;
+ if (llvm::parseMBBReference(PFS, MBB, Source.Value, Error))
+ return error(Error, Source.SourceRange);
+ return false;
+}
+
+SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange) {
+ assert(SourceRange.isValid() && "Invalid source range");
+ SMLoc Loc = SourceRange.Start;
+ bool HasQuote = Loc.getPointer() < SourceRange.End.getPointer() &&
+ *Loc.getPointer() == '\'';
+ // Translate the location of the error from the location in the MI string to
+ // the corresponding location in the MIR file.
+ Loc = Loc.getFromPointer(Loc.getPointer() + Error.getColumnNo() +
+ (HasQuote ? 1 : 0));
+
+ // TODO: Translate any source ranges as well.
+ return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), None,
+ Error.getFixIts());
+}
+
+SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
+ SMRange SourceRange) {
+ assert(SourceRange.isValid());
+
+ // Translate the location of the error from the location in the llvm IR string
+ // to the corresponding location in the MIR file.
+ auto LineAndColumn = SM.getLineAndColumn(SourceRange.Start);
+ unsigned Line = LineAndColumn.first + Error.getLineNo() - 1;
+ unsigned Column = Error.getColumnNo();
+ StringRef LineStr = Error.getLineContents();
+ SMLoc Loc = Error.getLoc();
+
+ // Get the full line and adjust the column number by taking the indentation of
+ // LLVM IR into account.
+ for (line_iterator L(*SM.getMemoryBuffer(SM.getMainFileID()), false), E;
+ L != E; ++L) {
+ if (L.line_number() == Line) {
+ LineStr = *L;
+ Loc = SMLoc::getFromPointer(LineStr.data());
+ auto Indent = LineStr.find(Error.getLineContents());
+ if (Indent != StringRef::npos)
+ Column += Indent;
+ break;
+ }
+ }
+
+ return SMDiagnostic(SM, Loc, Filename, Line, Column, Error.getKind(),
+ Error.getMessage(), LineStr, Error.getRanges(),
+ Error.getFixIts());
+}
+
+void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) {
+ if (!Names2RegClasses.empty())
+ return;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
+ const auto *RC = TRI->getRegClass(I);
+ Names2RegClasses.insert(
+ std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
+ }
+}
+
+void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) {
+ if (!Names2RegBanks.empty())
+ return;
+ const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo();
+ // If the target does not support GlobalISel, we may not have a
+ // register bank info.
+ if (!RBI)
+ return;
+ for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) {
+ const auto &RegBank = RBI->getRegBank(I);
+ Names2RegBanks.insert(
+ std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank));
+ }
+}
+
+const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
+ StringRef Name) {
+ initNames2RegClasses(MF);
+ auto RegClassInfo = Names2RegClasses.find(Name);
+ if (RegClassInfo == Names2RegClasses.end())
+ return nullptr;
+ return RegClassInfo->getValue();
+}
+
+const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF,
+ StringRef Name) {
+ initNames2RegBanks(MF);
+ auto RegBankInfo = Names2RegBanks.find(Name);
+ if (RegBankInfo == Names2RegBanks.end())
+ return nullptr;
+ return RegBankInfo->getValue();
+}
+
+MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
+ : Impl(std::move(Impl)) {}
+
+MIRParser::~MIRParser() {}
+
+std::unique_ptr<Module> MIRParser::parseLLVMModule() { return Impl->parse(); }
+
+bool MIRParser::initializeMachineFunction(MachineFunction &MF) {
+ return Impl->initializeMachineFunction(MF);
+}
+
+std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename,
+ SMDiagnostic &Error,
+ LLVMContext &Context) {
+ auto FileOrErr = MemoryBuffer::getFile(Filename);
+ if (std::error_code EC = FileOrErr.getError()) {
+ Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
+ "Could not open input file: " + EC.message());
+ return nullptr;
+ }
+ return createMIRParser(std::move(FileOrErr.get()), Context);
+}
+
+std::unique_ptr<MIRParser>
+llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
+ LLVMContext &Context) {
+ auto Filename = Contents->getBufferIdentifier();
+ return llvm::make_unique<MIRParser>(
+ llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
new file mode 100644
index 000000000000..703c99d9edd3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -0,0 +1,1005 @@
+//===- MIRPrinter.cpp - MIR serialization format printer ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the class that prints out the LLVM IR and machine
+// functions using the MIR serialization format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRPrinter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// This structure describes how to print out stack object references.
+struct FrameIndexOperand {
+ std::string Name;
+ unsigned ID;
+ bool IsFixed;
+
+ FrameIndexOperand(StringRef Name, unsigned ID, bool IsFixed)
+ : Name(Name.str()), ID(ID), IsFixed(IsFixed) {}
+
+ /// Return an ordinary stack object reference.
+ static FrameIndexOperand create(StringRef Name, unsigned ID) {
+ return FrameIndexOperand(Name, ID, /*IsFixed=*/false);
+ }
+
+ /// Return a fixed stack object reference.
+ static FrameIndexOperand createFixed(unsigned ID) {
+ return FrameIndexOperand("", ID, /*IsFixed=*/true);
+ }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+/// This class prints out the machine functions using the MIR serialization
+/// format.
+class MIRPrinter {
+ raw_ostream &OS;
+ DenseMap<const uint32_t *, unsigned> RegisterMaskIds;
+ /// Maps from stack object indices to operand indices which will be used when
+ /// printing frame index machine operands.
+ DenseMap<int, FrameIndexOperand> StackObjectOperandMapping;
+
+public:
+ MIRPrinter(raw_ostream &OS) : OS(OS) {}
+
+ void print(const MachineFunction &MF);
+
+ void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI);
+ void convert(ModuleSlotTracker &MST, yaml::MachineFrameInfo &YamlMFI,
+ const MachineFrameInfo &MFI);
+ void convert(yaml::MachineFunction &MF,
+ const MachineConstantPool &ConstantPool);
+ void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI,
+ const MachineJumpTableInfo &JTI);
+ void convertStackObjects(yaml::MachineFunction &MF,
+ const MachineFrameInfo &MFI, MachineModuleInfo &MMI,
+ ModuleSlotTracker &MST,
+ const TargetRegisterInfo *TRI);
+
+private:
+ void initRegisterMaskIds(const MachineFunction &MF);
+};
+
+/// This class prints out the machine instructions using the MIR serialization
+/// format.
+class MIPrinter {
+ raw_ostream &OS;
+ ModuleSlotTracker &MST;
+ const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
+ const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping;
+
+public:
+ MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST,
+ const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds,
+ const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping)
+ : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds),
+ StackObjectOperandMapping(StackObjectOperandMapping) {}
+
+ void print(const MachineBasicBlock &MBB);
+
+ void print(const MachineInstr &MI);
+ void printMBBReference(const MachineBasicBlock &MBB);
+ void printIRBlockReference(const BasicBlock &BB);
+ void printIRValueReference(const Value &V);
+ void printStackObjectReference(int FrameIndex);
+ void printOffset(int64_t Offset);
+ void printTargetFlags(const MachineOperand &Op);
+ void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
+ unsigned I, bool ShouldPrintRegisterTies,
+ const MachineRegisterInfo *MRI = nullptr, bool IsDef = false);
+ void print(const MachineMemOperand &Op);
+
+ void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
+};
+
+} // end namespace llvm
+
+namespace llvm {
+namespace yaml {
+
+/// This struct serializes the LLVM IR module.
+template <> struct BlockScalarTraits<Module> {
+ static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) {
+ Mod.print(OS, nullptr);
+ }
+ static StringRef input(StringRef Str, void *Ctxt, Module &Mod) {
+ llvm_unreachable("LLVM Module is supposed to be parsed separately");
+ return "";
+ }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+static void printReg(unsigned Reg, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ // TODO: Print Stack Slots.
+ if (!Reg)
+ OS << '_';
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (Reg < TRI->getNumRegs())
+ OS << '%' << StringRef(TRI->getName(Reg)).lower();
+ else
+ llvm_unreachable("Can't print this kind of register yet");
+}
+
+static void printReg(unsigned Reg, yaml::StringValue &Dest,
+ const TargetRegisterInfo *TRI) {
+ raw_string_ostream OS(Dest.Value);
+ printReg(Reg, OS, TRI);
+}
+
+void MIRPrinter::print(const MachineFunction &MF) {
+ initRegisterMaskIds(MF);
+
+ yaml::MachineFunction YamlMF;
+ YamlMF.Name = MF.getName();
+ YamlMF.Alignment = MF.getAlignment();
+ YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
+ YamlMF.HasInlineAsm = MF.hasInlineAsm();
+ YamlMF.AllVRegsAllocated = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+
+ convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
+ ModuleSlotTracker MST(MF.getFunction()->getParent());
+ MST.incorporateFunction(*MF.getFunction());
+ convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo());
+ convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST,
+ MF.getSubtarget().getRegisterInfo());
+ if (const auto *ConstantPool = MF.getConstantPool())
+ convert(YamlMF, *ConstantPool);
+ if (const auto *JumpTableInfo = MF.getJumpTableInfo())
+ convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo);
+ raw_string_ostream StrOS(YamlMF.Body.Value.Value);
+ bool IsNewlineNeeded = false;
+ for (const auto &MBB : MF) {
+ if (IsNewlineNeeded)
+ StrOS << "\n";
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .print(MBB);
+ IsNewlineNeeded = true;
+ }
+ StrOS.flush();
+ yaml::Output Out(OS);
+ Out << YamlMF;
+}
+
+void MIRPrinter::convert(yaml::MachineFunction &MF,
+ const MachineRegisterInfo &RegInfo,
+ const TargetRegisterInfo *TRI) {
+ MF.IsSSA = RegInfo.isSSA();
+ MF.TracksRegLiveness = RegInfo.tracksLiveness();
+ MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled();
+
+ // Print the virtual register definitions.
+ for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ yaml::VirtualRegisterDefinition VReg;
+ VReg.ID = I;
+ if (RegInfo.getRegClassOrNull(Reg))
+ VReg.Class =
+ StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+ else if (RegInfo.getRegBankOrNull(Reg))
+ VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
+ else {
+ VReg.Class = std::string("_");
+ assert(RegInfo.getSize(Reg) && "Generic registers must have a size");
+ }
+ unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
+ if (PreferredReg)
+ printReg(PreferredReg, VReg.PreferredRegister, TRI);
+ MF.VirtualRegisters.push_back(VReg);
+ }
+
+ // Print the live ins.
+ for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) {
+ yaml::MachineFunctionLiveIn LiveIn;
+ printReg(I->first, LiveIn.Register, TRI);
+ if (I->second)
+ printReg(I->second, LiveIn.VirtualRegister, TRI);
+ MF.LiveIns.push_back(LiveIn);
+ }
+ // The used physical register mask is printed as an inverted callee saved
+ // register mask.
+ const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask();
+ if (UsedPhysRegMask.none())
+ return;
+ std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
+ for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) {
+ if (!UsedPhysRegMask[I]) {
+ yaml::FlowStringValue Reg;
+ printReg(I, Reg, TRI);
+ CalleeSavedRegisters.push_back(Reg);
+ }
+ }
+ MF.CalleeSavedRegisters = CalleeSavedRegisters;
+}
+
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+ yaml::MachineFrameInfo &YamlMFI,
+ const MachineFrameInfo &MFI) {
+ YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken();
+ YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken();
+ YamlMFI.HasStackMap = MFI.hasStackMap();
+ YamlMFI.HasPatchPoint = MFI.hasPatchPoint();
+ YamlMFI.StackSize = MFI.getStackSize();
+ YamlMFI.OffsetAdjustment = MFI.getOffsetAdjustment();
+ YamlMFI.MaxAlignment = MFI.getMaxAlignment();
+ YamlMFI.AdjustsStack = MFI.adjustsStack();
+ YamlMFI.HasCalls = MFI.hasCalls();
+ YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize();
+ YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
+ YamlMFI.HasVAStart = MFI.hasVAStart();
+ YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
+ if (MFI.getSavePoint()) {
+ raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printMBBReference(*MFI.getSavePoint());
+ }
+ if (MFI.getRestorePoint()) {
+ raw_string_ostream StrOS(YamlMFI.RestorePoint.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printMBBReference(*MFI.getRestorePoint());
+ }
+}
+
+void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
+ const MachineFrameInfo &MFI,
+ MachineModuleInfo &MMI,
+ ModuleSlotTracker &MST,
+ const TargetRegisterInfo *TRI) {
+ // Process fixed stack objects.
+ unsigned ID = 0;
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ if (MFI.isDeadObjectIndex(I))
+ continue;
+
+ yaml::FixedMachineStackObject YamlObject;
+ YamlObject.ID = ID;
+ YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
+ ? yaml::FixedMachineStackObject::SpillSlot
+ : yaml::FixedMachineStackObject::DefaultType;
+ YamlObject.Offset = MFI.getObjectOffset(I);
+ YamlObject.Size = MFI.getObjectSize(I);
+ YamlObject.Alignment = MFI.getObjectAlignment(I);
+ YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
+ YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
+ MF.FixedStackObjects.push_back(YamlObject);
+ StackObjectOperandMapping.insert(
+ std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
+ }
+
+ // Process ordinary stack objects.
+ ID = 0;
+ for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) {
+ if (MFI.isDeadObjectIndex(I))
+ continue;
+
+ yaml::MachineStackObject YamlObject;
+ YamlObject.ID = ID;
+ if (const auto *Alloca = MFI.getObjectAllocation(I))
+ YamlObject.Name.Value =
+ Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>";
+ YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
+ ? yaml::MachineStackObject::SpillSlot
+ : MFI.isVariableSizedObjectIndex(I)
+ ? yaml::MachineStackObject::VariableSized
+ : yaml::MachineStackObject::DefaultType;
+ YamlObject.Offset = MFI.getObjectOffset(I);
+ YamlObject.Size = MFI.getObjectSize(I);
+ YamlObject.Alignment = MFI.getObjectAlignment(I);
+
+ MF.StackObjects.push_back(YamlObject);
+ StackObjectOperandMapping.insert(std::make_pair(
+ I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
+ }
+
+ for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
+ yaml::StringValue Reg;
+ printReg(CSInfo.getReg(), Reg, TRI);
+ auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ if (StackObject.IsFixed)
+ MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ else
+ MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+ }
+ for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
+ auto LocalObject = MFI.getLocalFrameObjectMap(I);
+ auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first);
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ assert(!StackObject.IsFixed && "Expected a locally mapped stack object");
+ MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
+ }
+
+ // Print the stack object references in the frame information class after
+ // converting the stack objects.
+ if (MFI.hasStackProtectorIndex()) {
+ raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printStackObjectReference(MFI.getStackProtectorIndex());
+ }
+
+ // Print the debug variable information.
+ for (MachineModuleInfo::VariableDbgInfo &DebugVar :
+ MMI.getVariableDbgInfo()) {
+ auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot);
+ assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid stack object index");
+ const FrameIndexOperand &StackObject = StackObjectInfo->second;
+ assert(!StackObject.IsFixed && "Expected a non-fixed stack object");
+ auto &Object = MF.StackObjects[StackObject.ID];
+ {
+ raw_string_ostream StrOS(Object.DebugVar.Value);
+ DebugVar.Var->printAsOperand(StrOS, MST);
+ }
+ {
+ raw_string_ostream StrOS(Object.DebugExpr.Value);
+ DebugVar.Expr->printAsOperand(StrOS, MST);
+ }
+ {
+ raw_string_ostream StrOS(Object.DebugLoc.Value);
+ DebugVar.Loc->printAsOperand(StrOS, MST);
+ }
+ }
+}
+
+void MIRPrinter::convert(yaml::MachineFunction &MF,
+ const MachineConstantPool &ConstantPool) {
+ unsigned ID = 0;
+ for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) {
+ // TODO: Serialize target specific constant pool entries.
+ if (Constant.isMachineConstantPoolEntry())
+ llvm_unreachable("Can't print target specific constant pool entries yet");
+
+ yaml::MachineConstantPoolValue YamlConstant;
+ std::string Str;
+ raw_string_ostream StrOS(Str);
+ Constant.Val.ConstVal->printAsOperand(StrOS);
+ YamlConstant.ID = ID++;
+ YamlConstant.Value = StrOS.str();
+ YamlConstant.Alignment = Constant.getAlignment();
+ MF.Constants.push_back(YamlConstant);
+ }
+}
+
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+ yaml::MachineJumpTable &YamlJTI,
+ const MachineJumpTableInfo &JTI) {
+ YamlJTI.Kind = JTI.getEntryKind();
+ unsigned ID = 0;
+ for (const auto &Table : JTI.getJumpTables()) {
+ std::string Str;
+ yaml::MachineJumpTable::Entry Entry;
+ Entry.ID = ID++;
+ for (const auto *MBB : Table.MBBs) {
+ raw_string_ostream StrOS(Str);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printMBBReference(*MBB);
+ Entry.Blocks.push_back(StrOS.str());
+ Str.clear();
+ }
+ YamlJTI.Entries.push_back(Entry);
+ }
+}
+
+void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) {
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ unsigned I = 0;
+ for (const uint32_t *Mask : TRI->getRegMasks())
+ RegisterMaskIds.insert(std::make_pair(Mask, I++));
+}
+
+void MIPrinter::print(const MachineBasicBlock &MBB) {
+ assert(MBB.getNumber() >= 0 && "Invalid MBB number");
+ OS << "bb." << MBB.getNumber();
+ bool HasAttributes = false;
+ if (const auto *BB = MBB.getBasicBlock()) {
+ if (BB->hasName()) {
+ OS << "." << BB->getName();
+ } else {
+ HasAttributes = true;
+ OS << " (";
+ int Slot = MST.getLocalSlot(BB);
+ if (Slot == -1)
+ OS << "<ir-block badref>";
+ else
+ OS << (Twine("%ir-block.") + Twine(Slot)).str();
+ }
+ }
+ if (MBB.hasAddressTaken()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "address-taken";
+ HasAttributes = true;
+ }
+ if (MBB.isEHPad()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "landing-pad";
+ HasAttributes = true;
+ }
+ if (MBB.getAlignment()) {
+ OS << (HasAttributes ? ", " : " (");
+ OS << "align " << MBB.getAlignment();
+ HasAttributes = true;
+ }
+ if (HasAttributes)
+ OS << ")";
+ OS << ":\n";
+
+ bool HasLineAttributes = false;
+ // Print the successors
+ if (!MBB.succ_empty()) {
+ OS.indent(2) << "successors: ";
+ for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
+ if (I != MBB.succ_begin())
+ OS << ", ";
+ printMBBReference(**I);
+ if (MBB.hasSuccessorProbabilities())
+ OS << '(' << MBB.getSuccProbability(I) << ')';
+ }
+ OS << "\n";
+ HasLineAttributes = true;
+ }
+
+ // Print the live in registers.
+ const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ if (!MBB.livein_empty()) {
+ OS.indent(2) << "liveins: ";
+ bool First = true;
+ for (const auto &LI : MBB.liveins()) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ printReg(LI.PhysReg, OS, TRI);
+ if (LI.LaneMask != ~0u)
+ OS << ':' << PrintLaneMask(LI.LaneMask);
+ }
+ OS << "\n";
+ HasLineAttributes = true;
+ }
+
+ if (HasLineAttributes)
+ OS << "\n";
+ bool IsInBundle = false;
+ for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) {
+ const MachineInstr &MI = *I;
+ if (IsInBundle && !MI.isInsideBundle()) {
+ OS.indent(2) << "}\n";
+ IsInBundle = false;
+ }
+ OS.indent(IsInBundle ? 4 : 2);
+ print(MI);
+ if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) {
+ OS << " {";
+ IsInBundle = true;
+ }
+ OS << "\n";
+ }
+ if (IsInBundle)
+ OS.indent(2) << "}\n";
+}
+
+/// Return true when an instruction has tied register that can't be determined
+/// by the instruction's descriptor.
+static bool hasComplexRegisterTies(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ const auto &Operand = MI.getOperand(I);
+ if (!Operand.isReg() || Operand.isDef())
+ // Ignore the defined registers as MCID marks only the uses as tied.
+ continue;
+ int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
+ int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1;
+ if (ExpectedTiedIdx != TiedIdx)
+ return true;
+ }
+ return false;
+}
+
+void MIPrinter::print(const MachineInstr &MI) {
+ const auto *MF = MI.getParent()->getParent();
+ const auto &MRI = MF->getRegInfo();
+ const auto &SubTarget = MF->getSubtarget();
+ const auto *TRI = SubTarget.getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ const auto *TII = SubTarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ if (MI.isCFIInstruction())
+ assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
+
+ bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
+ unsigned I = 0, E = MI.getNumOperands();
+ for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
+ !MI.getOperand(I).isImplicit();
+ ++I) {
+ if (I)
+ OS << ", ";
+ print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, &MRI,
+ /*IsDef=*/true);
+ }
+
+ if (I)
+ OS << " = ";
+ if (MI.getFlag(MachineInstr::FrameSetup))
+ OS << "frame-setup ";
+ OS << TII->getName(MI.getOpcode());
+ if (isPreISelGenericOpcode(MI.getOpcode())) {
+ assert(MI.getType() && "Generic instructions must have a type");
+ OS << ' ';
+ MI.getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
+ }
+ if (I < E)
+ OS << ' ';
+
+ bool NeedComma = false;
+ for (; I < E; ++I) {
+ if (NeedComma)
+ OS << ", ";
+ print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies);
+ NeedComma = true;
+ }
+
+ if (MI.getDebugLoc()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " debug-location ";
+ MI.getDebugLoc()->printAsOperand(OS, MST);
+ }
+
+ if (!MI.memoperands_empty()) {
+ OS << " :: ";
+ bool NeedComma = false;
+ for (const auto *Op : MI.memoperands()) {
+ if (NeedComma)
+ OS << ", ";
+ print(*Op);
+ NeedComma = true;
+ }
+ }
+}
+
+void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
+ OS << "%bb." << MBB.getNumber();
+ if (const auto *BB = MBB.getBasicBlock()) {
+ if (BB->hasName())
+ OS << '.' << BB->getName();
+ }
+}
+
+static void printIRSlotNumber(raw_ostream &OS, int Slot) {
+ if (Slot == -1)
+ OS << "<badref>";
+ else
+ OS << Slot;
+}
+
+void MIPrinter::printIRBlockReference(const BasicBlock &BB) {
+ OS << "%ir-block.";
+ if (BB.hasName()) {
+ printLLVMNameWithoutPrefix(OS, BB.getName());
+ return;
+ }
+ const Function *F = BB.getParent();
+ int Slot;
+ if (F == MST.getCurrentFunction()) {
+ Slot = MST.getLocalSlot(&BB);
+ } else {
+ ModuleSlotTracker CustomMST(F->getParent(),
+ /*ShouldInitializeAllMetadata=*/false);
+ CustomMST.incorporateFunction(*F);
+ Slot = CustomMST.getLocalSlot(&BB);
+ }
+ printIRSlotNumber(OS, Slot);
+}
+
+void MIPrinter::printIRValueReference(const Value &V) {
+ if (isa<GlobalValue>(V)) {
+ V.printAsOperand(OS, /*PrintType=*/false, MST);
+ return;
+ }
+ if (isa<Constant>(V)) {
+ // Machine memory operands can load/store to/from constant value pointers.
+ OS << '`';
+ V.printAsOperand(OS, /*PrintType=*/true, MST);
+ OS << '`';
+ return;
+ }
+ OS << "%ir.";
+ if (V.hasName()) {
+ printLLVMNameWithoutPrefix(OS, V.getName());
+ return;
+ }
+ printIRSlotNumber(OS, MST.getLocalSlot(&V));
+}
+
+void MIPrinter::printStackObjectReference(int FrameIndex) {
+ auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex);
+ assert(ObjectInfo != StackObjectOperandMapping.end() &&
+ "Invalid frame index");
+ const FrameIndexOperand &Operand = ObjectInfo->second;
+ if (Operand.IsFixed) {
+ OS << "%fixed-stack." << Operand.ID;
+ return;
+ }
+ OS << "%stack." << Operand.ID;
+ if (!Operand.Name.empty())
+ OS << '.' << Operand.Name;
+}
+
+void MIPrinter::printOffset(int64_t Offset) {
+ if (Offset == 0)
+ return;
+ if (Offset < 0) {
+ OS << " - " << -Offset;
+ return;
+ }
+ OS << " + " << Offset;
+}
+
+static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags) {
+ if (I.first == TF) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+void MIPrinter::printTargetFlags(const MachineOperand &Op) {
+ if (!Op.getTargetFlags())
+ return;
+ const auto *TII =
+ Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags());
+ OS << "target-flags(";
+ const bool HasDirectFlags = Flags.first;
+ const bool HasBitmaskFlags = Flags.second;
+ if (!HasDirectFlags && !HasBitmaskFlags) {
+ OS << "<unknown>) ";
+ return;
+ }
+ if (HasDirectFlags) {
+ if (const auto *Name = getTargetFlagName(TII, Flags.first))
+ OS << Name;
+ else
+ OS << "<unknown target flag>";
+ }
+ if (!HasBitmaskFlags) {
+ OS << ") ";
+ return;
+ }
+ bool IsCommaNeeded = HasDirectFlags;
+ unsigned BitMask = Flags.second;
+ auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &Mask : BitMasks) {
+ // Check if the flag's bitmask has the bits of the current mask set.
+ if ((BitMask & Mask.first) == Mask.first) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ IsCommaNeeded = true;
+ OS << Mask.second;
+ // Clear the bits which were serialized from the flag's bitmask.
+ BitMask &= ~(Mask.first);
+ }
+ }
+ if (BitMask) {
+ // When the resulting flag's bitmask isn't zero, we know that we didn't
+ // serialize all of the bit flags.
+ if (IsCommaNeeded)
+ OS << ", ";
+ OS << "<unknown bitmask target flag>";
+ }
+ OS << ") ";
+}
+
+static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ for (const auto &I : Indices) {
+ if (I.first == Index) {
+ return I.second;
+ }
+ }
+ return nullptr;
+}
+
+void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
+ unsigned I, bool ShouldPrintRegisterTies,
+ const MachineRegisterInfo *MRI, bool IsDef) {
+ printTargetFlags(Op);
+ switch (Op.getType()) {
+ case MachineOperand::MO_Register:
+ if (Op.isImplicit())
+ OS << (Op.isDef() ? "implicit-def " : "implicit ");
+ else if (!IsDef && Op.isDef())
+ // Print the 'def' flag only when the operand is defined after '='.
+ OS << "def ";
+ if (Op.isInternalRead())
+ OS << "internal ";
+ if (Op.isDead())
+ OS << "dead ";
+ if (Op.isKill())
+ OS << "killed ";
+ if (Op.isUndef())
+ OS << "undef ";
+ if (Op.isEarlyClobber())
+ OS << "early-clobber ";
+ if (Op.isDebug())
+ OS << "debug-use ";
+ printReg(Op.getReg(), OS, TRI);
+ // Print the sub register.
+ if (Op.getSubReg() != 0)
+ OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
+ if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
+ OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
+ assert((!IsDef || MRI) && "for IsDef, MRI must be provided");
+ if (IsDef && MRI->getSize(Op.getReg()))
+ OS << '(' << MRI->getSize(Op.getReg()) << ')';
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << Op.getImm();
+ break;
+ case MachineOperand::MO_CImmediate:
+ Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
+ case MachineOperand::MO_FPImmediate:
+ Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ printMBBReference(*Op.getMBB());
+ break;
+ case MachineOperand::MO_FrameIndex:
+ printStackObjectReference(Op.getIndex());
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "%const." << Op.getIndex();
+ printOffset(Op.getOffset());
+ break;
+ case MachineOperand::MO_TargetIndex: {
+ OS << "target-index(";
+ if (const auto *Name = getTargetIndexName(
+ *Op.getParent()->getParent()->getParent(), Op.getIndex()))
+ OS << Name;
+ else
+ OS << "<unknown>";
+ OS << ')';
+ printOffset(Op.getOffset());
+ break;
+ }
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "%jump-table." << Op.getIndex();
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << '$';
+ printLLVMNameWithoutPrefix(OS, Op.getSymbolName());
+ printOffset(Op.getOffset());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
+ printOffset(Op.getOffset());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ OS << "blockaddress(";
+ Op.getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false,
+ MST);
+ OS << ", ";
+ printIRBlockReference(*Op.getBlockAddress()->getBasicBlock());
+ OS << ')';
+ printOffset(Op.getOffset());
+ break;
+ case MachineOperand::MO_RegisterMask: {
+ auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask());
+ if (RegMaskInfo != RegisterMaskIds.end())
+ OS << StringRef(TRI->getRegMaskNames()[RegMaskInfo->second]).lower();
+ else
+ llvm_unreachable("Can't print this machine register mask yet.");
+ break;
+ }
+ case MachineOperand::MO_RegisterLiveOut: {
+ const uint32_t *RegMask = Op.getRegLiveOut();
+ OS << "liveout(";
+ bool IsCommaNeeded = false;
+ for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) {
+ if (RegMask[Reg / 32] & (1U << (Reg % 32))) {
+ if (IsCommaNeeded)
+ OS << ", ";
+ printReg(Reg, OS, TRI);
+ IsCommaNeeded = true;
+ }
+ }
+ OS << ")";
+ break;
+ }
+ case MachineOperand::MO_Metadata:
+ Op.getMetadata()->printAsOperand(OS, MST);
+ break;
+ case MachineOperand::MO_MCSymbol:
+ OS << "<mcsymbol " << *Op.getMCSymbol() << ">";
+ break;
+ case MachineOperand::MO_CFIIndex: {
+ const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI();
+ print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI);
+ break;
+ }
+ }
+}
+
+void MIPrinter::print(const MachineMemOperand &Op) {
+ OS << '(';
+ // TODO: Print operand's target specific flags.
+ if (Op.isVolatile())
+ OS << "volatile ";
+ if (Op.isNonTemporal())
+ OS << "non-temporal ";
+ if (Op.isInvariant())
+ OS << "invariant ";
+ if (Op.isLoad())
+ OS << "load ";
+ else {
+ assert(Op.isStore() && "Non load machine operand must be a store");
+ OS << "store ";
+ }
+ OS << Op.getSize();
+ if (const Value *Val = Op.getValue()) {
+ OS << (Op.isLoad() ? " from " : " into ");
+ printIRValueReference(*Val);
+ } else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) {
+ OS << (Op.isLoad() ? " from " : " into ");
+ assert(PVal && "Expected a pseudo source value");
+ switch (PVal->kind()) {
+ case PseudoSourceValue::Stack:
+ OS << "stack";
+ break;
+ case PseudoSourceValue::GOT:
+ OS << "got";
+ break;
+ case PseudoSourceValue::JumpTable:
+ OS << "jump-table";
+ break;
+ case PseudoSourceValue::ConstantPool:
+ OS << "constant-pool";
+ break;
+ case PseudoSourceValue::FixedStack:
+ printStackObjectReference(
+ cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex());
+ break;
+ case PseudoSourceValue::GlobalValueCallEntry:
+ OS << "call-entry ";
+ cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand(
+ OS, /*PrintType=*/false, MST);
+ break;
+ case PseudoSourceValue::ExternalSymbolCallEntry:
+ OS << "call-entry $";
+ printLLVMNameWithoutPrefix(
+ OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
+ break;
+ }
+ }
+ printOffset(Op.getOffset());
+ if (Op.getBaseAlignment() != Op.getSize())
+ OS << ", align " << Op.getBaseAlignment();
+ auto AAInfo = Op.getAAInfo();
+ if (AAInfo.TBAA) {
+ OS << ", !tbaa ";
+ AAInfo.TBAA->printAsOperand(OS, MST);
+ }
+ if (AAInfo.Scope) {
+ OS << ", !alias.scope ";
+ AAInfo.Scope->printAsOperand(OS, MST);
+ }
+ if (AAInfo.NoAlias) {
+ OS << ", !noalias ";
+ AAInfo.NoAlias->printAsOperand(OS, MST);
+ }
+ if (Op.getRanges()) {
+ OS << ", !range ";
+ Op.getRanges()->printAsOperand(OS, MST);
+ }
+ OS << ')';
+}
+
+static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ int Reg = TRI->getLLVMRegNum(DwarfReg, true);
+ if (Reg == -1) {
+ OS << "<badreg>";
+ return;
+ }
+ printReg(Reg, OS, TRI);
+}
+
+void MIPrinter::print(const MCCFIInstruction &CFI,
+ const TargetRegisterInfo *TRI) {
+ switch (CFI.getOperation()) {
+ case MCCFIInstruction::OpSameValue:
+ OS << ".cfi_same_value ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpOffset:
+ OS << ".cfi_offset ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfaRegister:
+ OS << ".cfi_def_cfa_register ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ break;
+ case MCCFIInstruction::OpDefCfaOffset:
+ OS << ".cfi_def_cfa_offset ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ OS << CFI.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfa:
+ OS << ".cfi_def_cfa ";
+ if (CFI.getLabel())
+ OS << "<mcsymbol> ";
+ printCFIRegister(CFI.getRegister(), OS, TRI);
+ OS << ", " << CFI.getOffset();
+ break;
+ default:
+ // TODO: Print the other CFI Operations.
+ OS << "<unserializable cfi operation>";
+ break;
+ }
+}
+
+void llvm::printMIR(raw_ostream &OS, const Module &M) {
+ yaml::Output Out(OS);
+ Out << const_cast<Module &>(M);
+}
+
+void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) {
+ MIRPrinter Printer(OS);
+ Printer.print(MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.h b/contrib/llvm/lib/CodeGen/MIRPrinter.h
new file mode 100644
index 000000000000..16aa9038b6b2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.h
@@ -0,0 +1,33 @@
+//===- MIRPrinter.h - MIR serialization format printer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the functions that print out the LLVM IR and the machine
+// functions using the MIR serialization format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRPRINTER_H
+#define LLVM_LIB_CODEGEN_MIRPRINTER_H
+
+namespace llvm {
+
+class MachineFunction;
+class Module;
+class raw_ostream;
+
+/// Print LLVM IR using the MIR serialization format to the given output stream.
+void printMIR(raw_ostream &OS, const Module &M);
+
+/// Print a machine function using the MIR serialization format to the given
+/// output stream.
+void printMIR(raw_ostream &OS, const MachineFunction &MF);
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
new file mode 100644
index 000000000000..8e7566a4e46b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -0,0 +1,71 @@
+//===- MIRPrintingPass.cpp - Pass that prints out using the MIR format ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that prints out the LLVM module using the MIR
+// serialization format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRPrinter.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MIRYamlMapping.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+/// This pass prints out the LLVM IR to an output stream using the MIR
+/// serialization format.
+struct MIRPrintingPass : public MachineFunctionPass {
+ static char ID;
+ raw_ostream &OS;
+ std::string MachineFunctions;
+
+ MIRPrintingPass() : MachineFunctionPass(ID), OS(dbgs()) {}
+ MIRPrintingPass(raw_ostream &OS) : MachineFunctionPass(ID), OS(OS) {}
+
+ const char *getPassName() const override { return "MIR Printing Pass"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ std::string Str;
+ raw_string_ostream StrOS(Str);
+ printMIR(StrOS, MF);
+ MachineFunctions.append(StrOS.str());
+ return false;
+ }
+
+ bool doFinalization(Module &M) override {
+ printMIR(OS, M);
+ OS << MachineFunctions;
+ return false;
+ }
+};
+
+char MIRPrintingPass::ID = 0;
+
+} // end anonymous namespace
+
+char &llvm::MIRPrintingPassID = MIRPrintingPass::ID;
+INITIALIZE_PASS(MIRPrintingPass, "mir-printer", "MIR Printer", false, false)
+
+namespace llvm {
+
+MachineFunctionPass *createPrintMIRPass(raw_ostream &OS) {
+ return new MIRPrintingPass(OS);
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 000000000000..689dd0764ce0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,1289 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "codegen"
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
+ : BB(B), Number(-1), xParent(&MF) {
+ Insts.Parent = this;
+}
+
+MachineBasicBlock::~MachineBasicBlock() {
+}
+
+/// Return the MCSymbol for this basic block.
+MCSymbol *MachineBasicBlock::getSymbol() const {
+ if (!CachedMCSymbol) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+ assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
+ CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
+ Twine(MF->getFunctionNumber()) +
+ "_" + Twine(getNumber()));
+ }
+
+ return CachedMCSymbol;
+}
+
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
+ MBB.print(OS);
+ return OS;
+}
+
+/// When an MBB is added to an MF, we need to update the parent pointer of the
+/// MBB, the MBB numbering, and any instructions in the MBB to be on the right
+/// operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+ MachineFunction &MF = *N->getParent();
+ N->Number = MF.addToMBBNumbering(N);
+
+ // Make sure the instructions have their operands in the reginfo lists.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (MachineBasicBlock::instr_iterator
+ I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
+ I->AddRegOperandsToUseLists(RegInfo);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+ N->getParent()->removeFromMBBNumbering(N->Number);
+ N->Number = -1;
+}
+
+/// When we add an instruction to a basic block list, we update its parent
+/// pointer and add its operands from reg use/def lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
+ assert(!N->getParent() && "machine instruction already in a basic block");
+ N->setParent(Parent);
+
+ // Add the instruction's register operands to their corresponding
+ // use/def lists.
+ MachineFunction *MF = Parent->getParent();
+ N->AddRegOperandsToUseLists(MF->getRegInfo());
+}
+
+/// When we remove an instruction from a basic block list, we update its parent
+/// pointer and remove its operands from reg use/def lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
+ assert(N->getParent() && "machine instruction not in a basic block");
+
+ // Remove from the use/def lists.
+ if (MachineFunction *MF = N->getParent()->getParent())
+ N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
+
+ N->setParent(nullptr);
+}
+
+/// When moving a range of instructions from one MBB list to another, we need to
+/// update the parent pointers and the use/def lists.
+void ilist_traits<MachineInstr>::
+transferNodesFromList(ilist_traits<MachineInstr> &FromList,
+ ilist_iterator<MachineInstr> First,
+ ilist_iterator<MachineInstr> Last) {
+ assert(Parent->getParent() == FromList.Parent->getParent() &&
+ "MachineInstr parent mismatch!");
+
+ // Splice within the same MBB -> no change.
+ if (Parent == FromList.Parent) return;
+
+ // If splicing between two blocks within the same function, just update the
+ // parent pointers.
+ for (; First != Last; ++First)
+ First->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+ assert(!MI->getParent() && "MI is still in a block!");
+ Parent->getParent()->DeleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
+ instr_iterator I = instr_begin(), E = instr_end();
+ while (I != E && I->isPHI())
+ ++I;
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi MI cannot be inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ iterator E = end();
+ while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue()))
+ ++I;
+ // FIXME: This needs to change if we wish to bundle labels / dbg_values
+ // inside the bundle.
+ assert((I == E || !I->isInsideBundle()) &&
+ "First non-phi / non-label instruction is inside a bundle!");
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+ iterator B = begin(), E = end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+ instr_iterator B = instr_begin(), E = instr_end(), I = E;
+ while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+ ; /*noop */
+ while (I != E && !I->isTerminator())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() {
+ // Skip over begin-of-block dbg_value instructions.
+ iterator I = begin(), E = end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+ // Skip over end-of-block dbg_value instructions.
+ instr_iterator B = instr_begin(), I = instr_end();
+ while (I != B) {
+ --I;
+ // Return instruction that starts a bundle.
+ if (I->isDebugValue() || I->isInsideBundle())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+bool MachineBasicBlock::hasEHPadSuccessor() const {
+ for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+ if ((*I)->isEHPad())
+ return true;
+ return false;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
+ print(dbgs());
+}
+#endif
+
+StringRef MachineBasicBlock::getName() const {
+ if (const BasicBlock *LBB = getBasicBlock())
+ return LBB->getName();
+ else
+ return "(null)";
+}
+
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+ std::string Name;
+ if (getParent())
+ Name = (getParent()->getName() + ":").str();
+ if (getBasicBlock())
+ Name += getBasicBlock()->getName();
+ else
+ Name += ("BB" + Twine(getNumber())).str();
+ return Name;
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes)
+ const {
+ const MachineFunction *MF = getParent();
+ if (!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+ const Function *F = MF->getFunction();
+ const Module *M = F ? F->getParent() : nullptr;
+ ModuleSlotTracker MST(M);
+ print(OS, MST, Indexes);
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
+ const SlotIndexes *Indexes) const {
+ const MachineFunction *MF = getParent();
+ if (!MF) {
+ OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+ << " is null\n";
+ return;
+ }
+
+ if (Indexes)
+ OS << Indexes->getMBBStartIdx(this) << '\t';
+
+ OS << "BB#" << getNumber() << ": ";
+
+ const char *Comma = "";
+ if (const BasicBlock *LBB = getBasicBlock()) {
+ OS << Comma << "derived from LLVM BB ";
+ LBB->printAsOperand(OS, /*PrintType=*/false, MST);
+ Comma = ", ";
+ }
+ if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+ if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+ if (Alignment)
+ OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
+ << " bytes)";
+
+ OS << '\n';
+
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ if (!livein_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Live Ins:";
+ for (const auto &LI : make_range(livein_begin(), livein_end())) {
+ OS << ' ' << PrintReg(LI.PhysReg, TRI);
+ if (LI.LaneMask != ~0u)
+ OS << ':' << PrintLaneMask(LI.LaneMask);
+ }
+ OS << '\n';
+ }
+ // Print the preds of this block according to the CFG.
+ if (!pred_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Predecessors according to CFG:";
+ for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+ OS << " BB#" << (*PI)->getNumber();
+ OS << '\n';
+ }
+
+ for (auto &I : instrs()) {
+ if (Indexes) {
+ if (Indexes->hasIndex(I))
+ OS << Indexes->getInstructionIndex(I);
+ OS << '\t';
+ }
+ OS << '\t';
+ if (I.isInsideBundle())
+ OS << " * ";
+ I.print(OS, MST);
+ }
+
+ // Print the successors of this block according to the CFG.
+ if (!succ_empty()) {
+ if (Indexes) OS << '\t';
+ OS << " Successors according to CFG:";
+ for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
+ OS << " BB#" << (*SI)->getNumber();
+ if (!Probs.empty())
+ OS << '(' << *getProbabilityIterator(SI) << ')';
+ }
+ OS << '\n';
+ }
+}
+
+void MachineBasicBlock::printAsOperand(raw_ostream &OS,
+ bool /*PrintType*/) const {
+ OS << "BB#" << getNumber();
+}
+
+void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
+ LiveInVector::iterator I = std::find_if(
+ LiveIns.begin(), LiveIns.end(),
+ [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ if (I == LiveIns.end())
+ return;
+
+ I->LaneMask &= ~LaneMask;
+ if (I->LaneMask == 0)
+ LiveIns.erase(I);
+}
+
+bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
+ livein_iterator I = std::find_if(
+ LiveIns.begin(), LiveIns.end(),
+ [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+ return I != livein_end() && (I->LaneMask & LaneMask) != 0;
+}
+
+void MachineBasicBlock::sortUniqueLiveIns() {
+ std::sort(LiveIns.begin(), LiveIns.end(),
+ [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
+ return LI0.PhysReg < LI1.PhysReg;
+ });
+ // Liveins are sorted by physreg now we can merge their lanemasks.
+ LiveInVector::const_iterator I = LiveIns.begin();
+ LiveInVector::const_iterator J;
+ LiveInVector::iterator Out = LiveIns.begin();
+ for (; I != LiveIns.end(); ++Out, I = J) {
+ unsigned PhysReg = I->PhysReg;
+ LaneBitmask LaneMask = I->LaneMask;
+ for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J)
+ LaneMask |= J->LaneMask;
+ Out->PhysReg = PhysReg;
+ Out->LaneMask = LaneMask;
+ }
+ LiveIns.erase(Out, LiveIns.end());
+}
+
+unsigned
+MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) {
+ assert(getParent() && "MBB must be inserted in function");
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg");
+ assert(RC && "Register class is required");
+ assert((isEHPad() || this == &getParent()->front()) &&
+ "Only the entry block and landing pads can have physreg live ins");
+
+ bool LiveIn = isLiveIn(PhysReg);
+ iterator I = SkipPHIsAndLabels(begin()), E = end();
+ MachineRegisterInfo &MRI = getParent()->getRegInfo();
+ const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo();
+
+ // Look for an existing copy.
+ if (LiveIn)
+ for (;I != E && I->isCopy(); ++I)
+ if (I->getOperand(1).getReg() == PhysReg) {
+ unsigned VirtReg = I->getOperand(0).getReg();
+ if (!MRI.constrainRegClass(VirtReg, RC))
+ llvm_unreachable("Incompatible live-in register class.");
+ return VirtReg;
+ }
+
+ // No luck, create a virtual register.
+ unsigned VirtReg = MRI.createVirtualRegister(RC);
+ BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg)
+ .addReg(PhysReg, RegState::Kill);
+ if (!LiveIn)
+ addLiveIn(PhysReg);
+ return VirtReg;
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+ getParent()->splice(NewAfter->getIterator(), getIterator());
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+ getParent()->splice(++NewBefore->getIterator(), getIterator());
+}
+
+void MachineBasicBlock::updateTerminator() {
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+ // A block with no successors has no concerns with fall-through edges.
+ if (this->succ_empty())
+ return;
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ DebugLoc DL; // FIXME: this is nowhere
+ bool B = TII->analyzeBranch(*this, TBB, FBB, Cond);
+ (void) B;
+ assert(!B && "UpdateTerminators requires analyzable predecessors!");
+ if (Cond.empty()) {
+ if (TBB) {
+ // The block has an unconditional branch. If its successor is now its
+ // layout successor, delete the branch.
+ if (isLayoutSuccessor(TBB))
+ TII->RemoveBranch(*this);
+ } else {
+ // The block has an unconditional fallthrough. If its successor is not its
+ // layout successor, insert a branch. First we have to locate the only
+ // non-landing-pad successor, as that is the fallthrough block.
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isEHPad())
+ continue;
+ assert(!TBB && "Found more than one non-landing-pad successor!");
+ TBB = *SI;
+ }
+
+ // If there is no non-landing-pad successor, the block has no fall-through
+ // edges to be concerned with.
+ if (!TBB)
+ return;
+
+ // Finally update the unconditional successor to be reached via a branch
+ // if it would not be reached by fallthrough.
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ }
+ return;
+ }
+
+ if (FBB) {
+ // The block has a non-fallthrough conditional branch. If one of its
+ // successors is its layout successor, rewrite it to a fallthrough
+ // conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond))
+ return;
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FBB, nullptr, Cond, DL);
+ } else if (isLayoutSuccessor(FBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ }
+ return;
+ }
+
+ // Walk through the successors and find the successor which is not a landing
+ // pad and is not the conditional branch destination (in TBB) as the
+ // fallthrough successor.
+ MachineBasicBlock *FallthroughBB = nullptr;
+ for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+ if ((*SI)->isEHPad() || *SI == TBB)
+ continue;
+ assert(!FallthroughBB && "Found more than one fallthrough successor.");
+ FallthroughBB = *SI;
+ }
+
+ if (!FallthroughBB) {
+ if (canFallThrough()) {
+ // We fallthrough to the same basic block as the conditional jump targets.
+ // Remove the conditional jump, leaving unconditional fallthrough.
+ // FIXME: This does not seem like a reasonable pattern to support, but it
+ // has been seen in the wild coming out of degenerate ARM test cases.
+ TII->RemoveBranch(*this);
+
+ // Finally update the unconditional successor to be reached via a branch if
+ // it would not be reached by fallthrough.
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ return;
+ }
+
+ // We enter here iff exactly one successor is TBB which cannot fallthrough
+ // and the rest successors if any are EHPads. In this case, we need to
+ // change the conditional branch into unconditional branch.
+ TII->RemoveBranch(*this);
+ Cond.clear();
+ TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+ return;
+ }
+
+ // The block has a fallthrough conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ if (TII->ReverseBranchCondition(Cond)) {
+ // We can't reverse the condition, add an unconditional branch.
+ Cond.clear();
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+ return;
+ }
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+ } else if (!isLayoutSuccessor(FallthroughBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL);
+ }
+}
+
+void MachineBasicBlock::validateSuccProbs() const {
+#ifndef NDEBUG
+ int64_t Sum = 0;
+ for (auto Prob : Probs)
+ Sum += Prob.getNumerator();
+ // Due to precision issue, we assume that the sum of probabilities is one if
+ // the difference between the sum of their numerators and the denominator is
+ // no greater than the number of successors.
+ assert((uint64_t)std::abs(Sum - BranchProbability::getDenominator()) <=
+ Probs.size() &&
+ "The sum of successors's probabilities exceeds one.");
+#endif // NDEBUG
+}
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *Succ,
+ BranchProbability Prob) {
+ // Probability list is either empty (if successor list isn't empty, this means
+ // disabled optimization) or has the same size as successor list.
+ if (!(Probs.empty() && !Successors.empty()))
+ Probs.push_back(Prob);
+ Successors.push_back(Succ);
+ Succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {
+ // We need to make sure probability list is either empty or has the same size
+ // of successor list. When this function is called, we can safely delete all
+ // probability in the list.
+ Probs.clear();
+ Successors.push_back(Succ);
+ Succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,
+ bool NormalizeSuccProbs) {
+ succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ);
+ removeSuccessor(I, NormalizeSuccProbs);
+}
+
+MachineBasicBlock::succ_iterator
+MachineBasicBlock::removeSuccessor(succ_iterator I, bool NormalizeSuccProbs) {
+ assert(I != Successors.end() && "Not a current successor!");
+
+ // If probability list is empty it means we don't use it (disabled
+ // optimization).
+ if (!Probs.empty()) {
+ probability_iterator WI = getProbabilityIterator(I);
+ Probs.erase(WI);
+ if (NormalizeSuccProbs)
+ normalizeSuccProbs();
+ }
+
+ (*I)->removePredecessor(this);
+ return Successors.erase(I);
+}
+
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ if (Old == New)
+ return;
+
+ succ_iterator E = succ_end();
+ succ_iterator NewI = E;
+ succ_iterator OldI = E;
+ for (succ_iterator I = succ_begin(); I != E; ++I) {
+ if (*I == Old) {
+ OldI = I;
+ if (NewI != E)
+ break;
+ }
+ if (*I == New) {
+ NewI = I;
+ if (OldI != E)
+ break;
+ }
+ }
+ assert(OldI != E && "Old is not a successor of this block");
+
+ // If New isn't already a successor, let it take Old's place.
+ if (NewI == E) {
+ Old->removePredecessor(this);
+ New->addPredecessor(this);
+ *OldI = New;
+ return;
+ }
+
+ // New is already a successor.
+ // Update its probability instead of adding a duplicate edge.
+ if (!Probs.empty()) {
+ auto ProbIter = getProbabilityIterator(NewI);
+ if (!ProbIter->isUnknown())
+ *ProbIter += *getProbabilityIterator(OldI);
+ }
+ removeSuccessor(OldI);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
+ Predecessors.push_back(Pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) {
+ pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred);
+ assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+ Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) {
+ if (this == FromMBB)
+ return;
+
+ while (!FromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *FromMBB->succ_begin();
+
+ // If probability list is empty it means we don't use it (disabled optimization).
+ if (!FromMBB->Probs.empty()) {
+ auto Prob = *FromMBB->Probs.begin();
+ addSuccessor(Succ, Prob);
+ } else
+ addSuccessorWithoutProb(Succ);
+
+ FromMBB->removeSuccessor(Succ);
+ }
+}
+
+void
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
+ if (this == FromMBB)
+ return;
+
+ while (!FromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *FromMBB->succ_begin();
+ if (!FromMBB->Probs.empty()) {
+ auto Prob = *FromMBB->Probs.begin();
+ addSuccessor(Succ, Prob);
+ } else
+ addSuccessorWithoutProb(Succ);
+ FromMBB->removeSuccessor(Succ);
+
+ // Fix up any PHI nodes in the successor.
+ for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
+ ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
+ for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.getMBB() == FromMBB)
+ MO.setMBB(this);
+ }
+ }
+ normalizeSuccProbs();
+}
+
+bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
+ return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+ return std::find(succ_begin(), succ_end(), MBB) != succ_end();
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+ MachineFunction::const_iterator I(this);
+ return std::next(I) == MachineFunction::const_iterator(MBB);
+}
+
+bool MachineBasicBlock::canFallThrough() {
+ MachineFunction::iterator Fallthrough = getIterator();
+ ++Fallthrough;
+ // If FallthroughBlock is off the end of the function, it can't fall through.
+ if (Fallthrough == getParent()->end())
+ return false;
+
+ // If FallthroughBlock isn't a successor, no fallthrough is possible.
+ if (!isSuccessor(&*Fallthrough))
+ return false;
+
+ // Analyze the branches, if any, at the end of the block.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+ if (TII->analyzeBranch(*this, TBB, FBB, Cond)) {
+ // If we couldn't analyze the branch, examine the last instruction.
+ // If the block doesn't end in a known control barrier, assume fallthrough
+ // is possible. The isPredicated check is needed because this code can be
+ // called during IfConversion, where an instruction which is normally a
+ // Barrier is predicated and thus no longer an actual control barrier.
+ return empty() || !back().isBarrier() || TII->isPredicated(back());
+ }
+
+ // If there is no branch, control always falls through.
+ if (!TBB) return true;
+
+ // If there is some explicit branch to the fallthrough block, it can obviously
+ // reach, even though the branch should get folded to fall through implicitly.
+ if (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough)
+ return true;
+
+ // If it's an unconditional branch to some block not the fall through, it
+ // doesn't fall through.
+ if (Cond.empty()) return false;
+
+ // Otherwise, if it is conditional and has no explicit false block, it falls
+ // through.
+ return FBB == nullptr;
+}
+
+MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
+ Pass &P) {
+ if (!canSplitCriticalEdge(Succ))
+ return nullptr;
+
+ MachineFunction *MF = getParent();
+ DebugLoc DL; // FIXME: this is nowhere
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
+ DEBUG(dbgs() << "Splitting critical edge:"
+ " BB#" << getNumber()
+ << " -- BB#" << NMBB->getNumber()
+ << " -- BB#" << Succ->getNumber() << '\n');
+
+ LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>();
+ SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>();
+ if (LIS)
+ LIS->insertMBBInMaps(NMBB);
+ else if (Indexes)
+ Indexes->insertMBBInMaps(NMBB);
+
+ // On some targets like Mips, branches may kill virtual registers. Make sure
+ // that LiveVariables is properly updated after updateTerminator replaces the
+ // terminators.
+ LiveVariables *LV = P.getAnalysisIfAvailable<LiveVariables>();
+
+ // Collect a list of virtual registers killed by the terminators.
+ SmallVector<unsigned, 4> KilledRegs;
+ if (LV)
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
+ MachineInstr *MI = &*I;
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ if (!OI->isReg() || OI->getReg() == 0 ||
+ !OI->isUse() || !OI->isKill() || OI->isUndef())
+ continue;
+ unsigned Reg = OI->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ LV->getVarInfo(Reg).removeKill(*MI)) {
+ KilledRegs.push_back(Reg);
+ DEBUG(dbgs() << "Removing terminator kill: " << *MI);
+ OI->setIsKill(false);
+ }
+ }
+ }
+
+ SmallVector<unsigned, 4> UsedRegs;
+ if (LIS) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I) {
+ MachineInstr *MI = &*I;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ if (!OI->isReg() || OI->getReg() == 0)
+ continue;
+
+ unsigned Reg = OI->getReg();
+ if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
+ ReplaceUsesOfBlockWith(Succ, NMBB);
+
+ // If updateTerminator() removes instructions, we need to remove them from
+ // SlotIndexes.
+ SmallVector<MachineInstr*, 4> Terminators;
+ if (Indexes) {
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I)
+ Terminators.push_back(&*I);
+ }
+
+ updateTerminator();
+
+ if (Indexes) {
+ SmallVector<MachineInstr*, 4> NewTerminators;
+ for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+ I != E; ++I)
+ NewTerminators.push_back(&*I);
+
+ for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
+ E = Terminators.end(); I != E; ++I) {
+ if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
+ NewTerminators.end())
+ Indexes->removeMachineInstrFromMaps(**I);
+ }
+ }
+
+ // Insert unconditional "jump Succ" instruction in NMBB if necessary.
+ NMBB->addSuccessor(Succ);
+ if (!NMBB->isLayoutSuccessor(Succ)) {
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+ TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);
+
+ if (Indexes) {
+ for (MachineInstr &MI : NMBB->instrs()) {
+ // Some instructions may have been moved to NMBB by updateTerminator(),
+ // so we first remove any instruction that already has an index.
+ if (Indexes->hasIndex(MI))
+ Indexes->removeMachineInstrFromMaps(MI);
+ Indexes->insertMachineInstrInMaps(MI);
+ }
+ }
+ }
+
+ // Fix PHI nodes in Succ so they refer to NMBB instead of this
+ for (MachineBasicBlock::instr_iterator
+ i = Succ->instr_begin(),e = Succ->instr_end();
+ i != e && i->isPHI(); ++i)
+ for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+ if (i->getOperand(ni+1).getMBB() == this)
+ i->getOperand(ni+1).setMBB(NMBB);
+
+ // Inherit live-ins from the successor
+ for (const auto &LI : Succ->liveins())
+ NMBB->addLiveIn(LI);
+
+ // Update LiveVariables.
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ if (LV) {
+ // Restore kills of virtual registers that were killed by the terminators.
+ while (!KilledRegs.empty()) {
+ unsigned Reg = KilledRegs.pop_back_val();
+ for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
+ if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ LV->getVarInfo(Reg).Kills.push_back(&*I);
+ DEBUG(dbgs() << "Restored terminator kill: " << *I);
+ break;
+ }
+ }
+ // Update relevant live-through information.
+ LV->addNewBlock(NMBB, this, Succ);
+ }
+
+ if (LIS) {
+ // After splitting the edge and updating SlotIndexes, live intervals may be
+ // in one of two situations, depending on whether this block was the last in
+ // the function. If the original block was the last in the function, all
+ // live intervals will end prior to the beginning of the new split block. If
+ // the original block was not at the end of the function, all live intervals
+ // will extend to the end of the new split block.
+
+ bool isLastMBB =
+ std::next(MachineFunction::iterator(NMBB)) == getParent()->end();
+
+ SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
+ SlotIndex PrevIndex = StartIndex.getPrevSlot();
+ SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB);
+
+ // Find the registers used from NMBB in PHIs in Succ.
+ SmallSet<unsigned, 8> PHISrcRegs;
+ for (MachineBasicBlock::instr_iterator
+ I = Succ->instr_begin(), E = Succ->instr_end();
+ I != E && I->isPHI(); ++I) {
+ for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
+ if (I->getOperand(ni+1).getMBB() == NMBB) {
+ MachineOperand &MO = I->getOperand(ni);
+ unsigned Reg = MO.getReg();
+ PHISrcRegs.insert(Reg);
+ if (MO.isUndef())
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI &&
+ "PHI sources should be live out of their predecessors.");
+ LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
+ }
+ }
+ }
+
+ MachineRegisterInfo *MRI = &getParent()->getRegInfo();
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
+ continue;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (!LI.liveAt(PrevIndex))
+ continue;
+
+ bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ));
+ if (isLiveOut && isLastMBB) {
+ VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
+ assert(VNI && "LiveInterval should have VNInfo where it is live.");
+ LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
+ } else if (!isLiveOut && !isLastMBB) {
+ LI.removeSegment(StartIndex, EndIndex);
+ }
+ }
+
+ // Update all intervals for registers whose uses may have been modified by
+ // updateTerminator().
+ LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs);
+ }
+
+ if (MachineDominatorTree *MDT =
+ P.getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->recordSplitCriticalEdge(this, Succ, NMBB);
+
+ if (MachineLoopInfo *MLI = P.getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoop *TIL = MLI->getLoopFor(this)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NMBB joins loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == Succ &&
+ "Should not create irreducible loops!");
+ if (MachineLoop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NMBB, MLI->getBase());
+ }
+ }
+ }
+
+ return NMBB;
+}
+
+bool MachineBasicBlock::canSplitCriticalEdge(
+ const MachineBasicBlock *Succ) const {
+ // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+ // it in this generic function.
+ if (Succ->isEHPad())
+ return false;
+
+ const MachineFunction *MF = getParent();
+
+ // Performance might be harmed on HW that implements branching using exec mask
+ // where both sides of the branches are always executed.
+ if (MF->getTarget().requiresStructuredCFG())
+ return false;
+
+ // We may need to update this's terminator, but we can't do that if
+ // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ // AnalyzeBanch should modify this, since we did not allow modification.
+ if (TII->analyzeBranch(*const_cast<MachineBasicBlock *>(this), TBB, FBB, Cond,
+ /*AllowModify*/ false))
+ return false;
+
+ // Avoid bugpoint weirdness: A block may end with a conditional branch but
+ // jumps to the same MBB is either case. We have duplicate CFG edges in that
+ // case that we can't handle. Since this never happens in properly optimized
+ // code, just skip those edges.
+ if (TBB && TBB == FBB) {
+ DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+ << getNumber() << '\n');
+ return false;
+ }
+ return true;
+}
+
+/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's
+/// neighboring instructions so the bundle won't be broken by removing MI.
+static void unbundleSingleMI(MachineInstr *MI) {
+ // Removing the first instruction in a bundle.
+ if (MI->isBundledWithSucc() && !MI->isBundledWithPred())
+ MI->unbundleFromSucc();
+ // Removing the last instruction in a bundle.
+ if (MI->isBundledWithPred() && !MI->isBundledWithSucc())
+ MI->unbundleFromPred();
+ // If MI is not bundled, or if it is internal to a bundle, the neighbor flags
+ // are already fine.
+}
+
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) {
+ unbundleSingleMI(&*I);
+ return Insts.erase(I);
+}
+
+MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) {
+ unbundleSingleMI(MI);
+ MI->clearFlag(MachineInstr::BundledPred);
+ MI->clearFlag(MachineInstr::BundledSucc);
+ return Insts.remove(MI);
+}
+
+MachineBasicBlock::instr_iterator
+MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
+ assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
+ "Cannot insert instruction with bundle flags");
+ // Set the bundle flags when inserting inside a bundle.
+ if (I != instr_end() && I->isBundledWithPred()) {
+ MI->setFlag(MachineInstr::BundledPred);
+ MI->setFlag(MachineInstr::BundledSucc);
+ }
+ return Insts.insert(I, MI);
+}
+
+/// This method unlinks 'this' from the containing function, and returns it, but
+/// does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->remove(this);
+ return this;
+}
+
+/// This method unlinks 'this' from the containing function, and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+ assert(getParent() && "Not embedded in a function!");
+ getParent()->erase(this);
+}
+
+/// Given a machine basic block that branched to 'Old', change the code and CFG
+/// so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Cannot replace self with self!");
+
+ MachineBasicBlock::instr_iterator I = instr_end();
+ while (I != instr_begin()) {
+ --I;
+ if (!I->isTerminator()) break;
+
+ // Scan the operands of this machine instruction, replacing any uses of Old
+ // with New.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (I->getOperand(i).isMBB() &&
+ I->getOperand(i).getMBB() == Old)
+ I->getOperand(i).setMBB(New);
+ }
+
+ // Update the successor information.
+ replaceSuccessor(Old, New);
+}
+
+/// Various pieces of code can cause excess edges in the CFG to be inserted. If
+/// we have proven that MBB can only branch to DestA and DestB, remove any other
+/// MBB successors from the CFG. DestA and DestB can be null.
+///
+/// Besides DestA and DestB, retain other edges leading to LandingPads
+/// (currently there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+ MachineBasicBlock *DestB,
+ bool IsCond) {
+ // The values of DestA and DestB frequently come from a call to the
+ // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
+ // values from there.
+ //
+ // 1. If both DestA and DestB are null, then the block ends with no branches
+ // (it falls through to its successor).
+ // 2. If DestA is set, DestB is null, and IsCond is false, then the block ends
+ // with only an unconditional branch.
+ // 3. If DestA is set, DestB is null, and IsCond is true, then the block ends
+ // with a conditional branch that falls through to a successor (DestB).
+ // 4. If DestA and DestB is set and IsCond is true, then the block ends with a
+ // conditional branch followed by an unconditional branch. DestA is the
+ // 'true' destination and DestB is the 'false' destination.
+
+ bool Changed = false;
+
+ MachineFunction::iterator FallThru = std::next(getIterator());
+
+ if (!DestA && !DestB) {
+ // Block falls through to successor.
+ DestA = &*FallThru;
+ DestB = &*FallThru;
+ } else if (DestA && !DestB) {
+ if (IsCond)
+ // Block ends in conditional jump that falls through to successor.
+ DestB = &*FallThru;
+ } else {
+ assert(DestA && DestB && IsCond &&
+ "CFG in a bad state. Cannot correct CFG edges");
+ }
+
+ // Remove superfluous edges. I.e., those which aren't destinations of this
+ // basic block, duplicate edges, or landing pads.
+ SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs;
+ MachineBasicBlock::succ_iterator SI = succ_begin();
+ while (SI != succ_end()) {
+ const MachineBasicBlock *MBB = *SI;
+ if (!SeenMBBs.insert(MBB).second ||
+ (MBB != DestA && MBB != DestB && !MBB->isEHPad())) {
+ // This is a superfluous edge, remove it.
+ SI = removeSuccessor(SI);
+ Changed = true;
+ } else {
+ ++SI;
+ }
+ }
+
+ if (Changed)
+ normalizeSuccProbs();
+ return Changed;
+}
+
+/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
+/// instructions. Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
+ DebugLoc DL;
+ instr_iterator E = instr_end();
+ if (MBBI == E)
+ return DL;
+
+ // Skip debug declarations, we don't want a DebugLoc from them.
+ while (MBBI != E && MBBI->isDebugValue())
+ MBBI++;
+ if (MBBI != E)
+ DL = MBBI->getDebugLoc();
+ return DL;
+}
+
+/// Return probability of the edge from this block to MBB.
+BranchProbability
+MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const {
+ if (Probs.empty())
+ return BranchProbability(1, succ_size());
+
+ const auto &Prob = *getProbabilityIterator(Succ);
+ if (Prob.isUnknown()) {
+ // For unknown probabilities, collect the sum of all known ones, and evenly
+ // ditribute the complemental of the sum to each unknown probability.
+ unsigned KnownProbNum = 0;
+ auto Sum = BranchProbability::getZero();
+ for (auto &P : Probs) {
+ if (!P.isUnknown()) {
+ Sum += P;
+ KnownProbNum++;
+ }
+ }
+ return Sum.getCompl() / (Probs.size() - KnownProbNum);
+ } else
+ return Prob;
+}
+
+/// Set successor probability of a given iterator.
+void MachineBasicBlock::setSuccProbability(succ_iterator I,
+ BranchProbability Prob) {
+ assert(!Prob.isUnknown());
+ if (Probs.empty())
+ return;
+ *getProbabilityIterator(I) = Prob;
+}
+
+/// Return probability iterator corresonding to the I successor iterator
+MachineBasicBlock::const_probability_iterator
+MachineBasicBlock::getProbabilityIterator(
+ MachineBasicBlock::const_succ_iterator I) const {
+ assert(Probs.size() == Successors.size() && "Async probability list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Probs.size() && "Not a current successor!");
+ return Probs.begin() + index;
+}
+
+/// Return probability iterator corresonding to the I successor iterator.
+MachineBasicBlock::probability_iterator
+MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
+ assert(Probs.size() == Successors.size() && "Async probability list!");
+ const size_t index = std::distance(Successors.begin(), I);
+ assert(index < Probs.size() && "Not a current successor!");
+ return Probs.begin() + index;
+}
+
+/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
+/// as of just before "MI".
+///
+/// Search is localised to a neighborhood of
+/// Neighborhood instructions before (searching for defs or kills) and N
+/// instructions after (searching just for defs) MI.
+MachineBasicBlock::LivenessQueryResult
+MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
+ unsigned Reg, const_iterator Before,
+ unsigned Neighborhood) const {
+ unsigned N = Neighborhood;
+
+ // Start by searching backwards from Before, looking for kills, reads or defs.
+ const_iterator I(Before);
+ // If this is the first insn in the block, don't search backwards.
+ if (I != begin()) {
+ do {
+ --I;
+
+ MachineOperandIteratorBase::PhysRegInfo Info =
+ ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+
+ // Defs happen after uses so they take precedence if both are present.
+
+ // Register is dead after a dead def of the full register.
+ if (Info.DeadDef)
+ return LQR_Dead;
+ // Register is (at least partially) live after a def.
+ if (Info.Defined) {
+ if (!Info.PartialDeadDef)
+ return LQR_Live;
+ // As soon as we saw a partial definition (dead or not),
+ // we cannot tell if the value is partial live without
+ // tracking the lanemasks. We are not going to do this,
+ // so fall back on the remaining of the analysis.
+ break;
+ }
+ // Register is dead after a full kill or clobber and no def.
+ if (Info.Killed || Info.Clobbered)
+ return LQR_Dead;
+ // Register must be live if we read it.
+ if (Info.Read)
+ return LQR_Live;
+ } while (I != begin() && --N > 0);
+ }
+
+ // Did we get to the start of the block?
+ if (I == begin()) {
+ // If so, the register's state is definitely defined by the live-in state.
+ for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid();
+ ++RAI)
+ if (isLiveIn(*RAI))
+ return LQR_Live;
+
+ return LQR_Dead;
+ }
+
+ N = Neighborhood;
+
+ // Try searching forwards from Before, looking for reads or defs.
+ I = const_iterator(Before);
+ // If this is the last insn in the block, don't search forwards.
+ if (I != end()) {
+ for (++I; I != end() && N > 0; ++I, --N) {
+ MachineOperandIteratorBase::PhysRegInfo Info =
+ ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+
+ // Register is live when we read it here.
+ if (Info.Read)
+ return LQR_Live;
+ // Register is dead if we can fully overwrite or clobber it here.
+ if (Info.FullyDefined || Info.Clobbered)
+ return LQR_Dead;
+ }
+ }
+
+ // At this point we have no idea of the liveness of the register.
+ return LQR_Unknown;
+}
+
+const uint32_t *
+MachineBasicBlock::getBeginClobberMask(const TargetRegisterInfo *TRI) const {
+ // EH funclet entry does not preserve any registers.
+ return isEHFuncletEntry() ? TRI->getNoPreservedMask() : nullptr;
+}
+
+const uint32_t *
+MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
+ // If we see a return block with successors, this must be a funclet return,
+ // which does not preserve any registers. If there are no successors, we don't
+ // care what kind of return it is, putting a mask after it is a no-op.
+ return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
new file mode 100644
index 000000000000..6c0f99fa111e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -0,0 +1,200 @@
+//===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "block-freq"
+
+#ifndef NDEBUG
+
+static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
+ "view-machine-block-freq-propagation-dags", cl::Hidden,
+ cl::desc("Pop up a window to show a dag displaying how machine block "
+ "frequencies propagate through the CFG."),
+ cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction",
+ "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer",
+ "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValN(GVDT_Count, "count", "display a graph using the real "
+ "profile count if available."),
+
+ clEnumValEnd));
+
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+extern cl::opt<unsigned> ViewHotFreqPercent;
+
+namespace llvm {
+
+template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
+ typedef const MachineBasicBlock NodeType;
+ typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
+ typedef MachineFunction::const_iterator nodes_iterator;
+
+ static inline const NodeType *
+ getEntryNode(const MachineBlockFrequencyInfo *G) {
+ return &G->getFunction()->front();
+ }
+
+ static ChildIteratorType child_begin(const NodeType *N) {
+ return N->succ_begin();
+ }
+
+ static ChildIteratorType child_end(const NodeType *N) {
+ return N->succ_end();
+ }
+
+ static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) {
+ return G->getFunction()->begin();
+ }
+
+ static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) {
+ return G->getFunction()->end();
+ }
+};
+
+typedef BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo,
+ MachineBranchProbabilityInfo>
+ MBFIDOTGraphTraitsBase;
+template <>
+struct DOTGraphTraits<MachineBlockFrequencyInfo *>
+ : public MBFIDOTGraphTraitsBase {
+ explicit DOTGraphTraits(bool isSimple = false)
+ : MBFIDOTGraphTraitsBase(isSimple) {}
+
+ std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineBlockFrequencyInfo *Graph) {
+ return MBFIDOTGraphTraitsBase::getNodeLabel(
+ Node, Graph, ViewMachineBlockFreqPropagationDAG);
+ }
+
+ std::string getNodeAttributes(const MachineBasicBlock *Node,
+ const MachineBlockFrequencyInfo *Graph) {
+ return MBFIDOTGraphTraitsBase::getNodeAttributes(Node, Graph,
+ ViewHotFreqPercent);
+ }
+
+ std::string getEdgeAttributes(const MachineBasicBlock *Node, EdgeIter EI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return MBFIDOTGraphTraitsBase::getEdgeAttributes(
+ Node, EI, MBFI, MBFI->getMBPI(), ViewHotFreqPercent);
+ }
+};
+
+} // end namespace llvm
+#endif
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq",
+ "Machine Block Frequency Analysis", true, true)
+
+char MachineBlockFrequencyInfo::ID = 0;
+
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo()
+ : MachineFunctionPass(ID) {
+ initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {}
+
+void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
+ MachineBranchProbabilityInfo &MBPI =
+ getAnalysis<MachineBranchProbabilityInfo>();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ if (!MBFI)
+ MBFI.reset(new ImplType);
+ MBFI->calculate(F, MBPI, MLI);
+#ifndef NDEBUG
+ if (ViewMachineBlockFreqPropagationDAG != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ F.getName().equals(ViewBlockFreqFuncName))) {
+ view();
+ }
+#endif
+ return false;
+}
+
+void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); }
+
+/// Pop up a ghostview window with the current block frequency propagation
+/// rendered using dot.
+void MachineBlockFrequencyInfo::view() const {
+// This code is only for debugging.
+#ifndef NDEBUG
+ ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this),
+ "MachineBlockFrequencyDAGs");
+#else
+ errs() << "MachineBlockFrequencyInfo::view is only available in debug builds "
+ "on systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+BlockFrequency
+MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const {
+ return MBFI ? MBFI->getBlockFreq(MBB) : 0;
+}
+
+Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
+ const MachineBasicBlock *MBB) const {
+ const Function *F = MBFI->getFunction()->getFunction();
+ return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None;
+}
+
+const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
+ return MBFI ? MBFI->getFunction() : nullptr;
+}
+
+const MachineBranchProbabilityInfo *MachineBlockFrequencyInfo::getMBPI() const {
+ return MBFI ? &MBFI->getBPI() : nullptr;
+}
+
+raw_ostream &
+MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
+ const BlockFrequency Freq) const {
+ return MBFI ? MBFI->printBlockFreq(OS, Freq) : OS;
+}
+
+raw_ostream &
+MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
+ const MachineBasicBlock *MBB) const {
+ return MBFI ? MBFI->printBlockFreq(OS, MBB) : OS;
+}
+
+uint64_t MachineBlockFrequencyInfo::getEntryFreq() const {
+ return MBFI ? MBFI->getEntryFreq() : 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
new file mode 100644
index 000000000000..03dda8b36a71
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -0,0 +1,1796 @@
+//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations using the CFG
+// structure and branch probability estimates.
+//
+// The pass strives to preserve the structure of the CFG (that is, retain
+// a topological ordering of basic blocks) in the absence of a *strong* signal
+// to the contrary from probabilities. However, within the CFG structure, it
+// attempts to choose an ordering which favors placing more likely sequences of
+// blocks adjacent to each other.
+//
+// The algorithm works from the inner-most loop within a function outward, and
+// at each stage walks through the basic blocks, trying to coalesce them into
+// sequential chains where allowed by the CFG (or demanded by heavy
+// probabilities). Finally, it walks the blocks in topological order, and the
+// first time it reaches a chain of basic blocks, it schedules them in the
+// function in-order.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "BranchFolding.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "block-placement"
+
+STATISTIC(NumCondBranches, "Number of conditional branches");
+STATISTIC(NumUncondBranches, "Number of unconditional branches");
+STATISTIC(CondBranchTakenFreq,
+ "Potential frequency of taking conditional branches");
+STATISTIC(UncondBranchTakenFreq,
+ "Potential frequency of taking unconditional branches");
+
+static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
+ cl::desc("Force the alignment of all "
+ "blocks in the function."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<unsigned> AlignAllNonFallThruBlocks(
+ "align-all-nofallthru-blocks",
+ cl::desc("Force the alignment of all "
+ "blocks that have no fall-through predecessors (i.e. don't add "
+ "nops that are executed)."),
+ cl::init(0), cl::Hidden);
+
+// FIXME: Find a good default for this flag and remove the flag.
+static cl::opt<unsigned> ExitBlockBias(
+ "block-placement-exit-block-bias",
+ cl::desc("Block frequency percentage a loop exit block needs "
+ "over the original exit to be considered the new exit."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<bool> OutlineOptionalBranches(
+ "outline-optional-branches",
+ cl::desc("Put completely optional branches, i.e. branches with a common "
+ "post dominator, out of line."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> OutlineOptionalThreshold(
+ "outline-optional-threshold",
+ cl::desc("Don't outline optional branches that are a single block with an "
+ "instruction count below this threshold"),
+ cl::init(4), cl::Hidden);
+
+static cl::opt<unsigned> LoopToColdBlockRatio(
+ "loop-to-cold-block-ratio",
+ cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
+ "(frequency of block) is greater than this ratio"),
+ cl::init(5), cl::Hidden);
+
+static cl::opt<bool>
+ PreciseRotationCost("precise-rotation-cost",
+ cl::desc("Model the cost of loop rotation more "
+ "precisely by using profile data."),
+ cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ ForcePreciseRotationCost("force-precise-rotation-cost",
+ cl::desc("Force the use of precise cost "
+ "loop rotation strategy."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> MisfetchCost(
+ "misfetch-cost",
+ cl::desc("Cost that models the probabilistic risk of an instruction "
+ "misfetch due to a jump comparing to falling through, whose cost "
+ "is zero."),
+ cl::init(1), cl::Hidden);
+
+static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
+ cl::desc("Cost of jump instructions."),
+ cl::init(1), cl::Hidden);
+
+static cl::opt<bool>
+BranchFoldPlacement("branch-fold-placement",
+ cl::desc("Perform branch folding during placement. "
+ "Reduces code size."),
+ cl::init(true), cl::Hidden);
+
+extern cl::opt<unsigned> StaticLikelyProb;
+extern cl::opt<unsigned> ProfileLikelyProb;
+
+namespace {
+class BlockChain;
+/// \brief Type for our function-wide basic block -> block chain mapping.
+typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+}
+
+namespace {
+/// \brief A chain of blocks which will be laid out contiguously.
+///
+/// This is the datastructure representing a chain of consecutive blocks that
+/// are profitable to layout together in order to maximize fallthrough
+/// probabilities and code locality. We also can use a block chain to represent
+/// a sequence of basic blocks which have some external (correctness)
+/// requirement for sequential layout.
+///
+/// Chains can be built around a single basic block and can be merged to grow
+/// them. They participate in a block-to-chain mapping, which is updated
+/// automatically as chains are merged together.
+class BlockChain {
+ /// \brief The sequence of blocks belonging to this chain.
+ ///
+ /// This is the sequence of blocks for a particular chain. These will be laid
+ /// out in-order within the function.
+ SmallVector<MachineBasicBlock *, 4> Blocks;
+
+ /// \brief A handle to the function-wide basic block to block chain mapping.
+ ///
+ /// This is retained in each block chain to simplify the computation of child
+ /// block chains for SCC-formation and iteration. We store the edges to child
+ /// basic blocks, and map them back to their associated chains using this
+ /// structure.
+ BlockToChainMapType &BlockToChain;
+
+public:
+ /// \brief Construct a new BlockChain.
+ ///
+ /// This builds a new block chain representing a single basic block in the
+ /// function. It also registers itself as the chain that block participates
+ /// in with the BlockToChain mapping.
+ BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
+ : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) {
+ assert(BB && "Cannot create a chain with a null basic block");
+ BlockToChain[BB] = this;
+ }
+
+ /// \brief Iterator over blocks within the chain.
+ typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
+
+ /// \brief Beginning of blocks within the chain.
+ iterator begin() { return Blocks.begin(); }
+
+ /// \brief End of blocks within the chain.
+ iterator end() { return Blocks.end(); }
+
+ /// \brief Merge a block chain into this one.
+ ///
+ /// This routine merges a block chain into this one. It takes care of forming
+ /// a contiguous sequence of basic blocks, updating the edge list, and
+ /// updating the block -> chain mapping. It does not free or tear down the
+ /// old chain, but the old chain's block list is no longer valid.
+ void merge(MachineBasicBlock *BB, BlockChain *Chain) {
+ assert(BB);
+ assert(!Blocks.empty());
+
+ // Fast path in case we don't have a chain already.
+ if (!Chain) {
+ assert(!BlockToChain[BB]);
+ Blocks.push_back(BB);
+ BlockToChain[BB] = this;
+ return;
+ }
+
+ assert(BB == *Chain->begin());
+ assert(Chain->begin() != Chain->end());
+
+ // Update the incoming blocks to point to this chain, and add them to the
+ // chain structure.
+ for (MachineBasicBlock *ChainBB : *Chain) {
+ Blocks.push_back(ChainBB);
+ assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
+ BlockToChain[ChainBB] = this;
+ }
+ }
+
+#ifndef NDEBUG
+ /// \brief Dump the blocks in this chain.
+ LLVM_DUMP_METHOD void dump() {
+ for (MachineBasicBlock *MBB : *this)
+ MBB->dump();
+ }
+#endif // NDEBUG
+
+ /// \brief Count of predecessors of any block within the chain which have not
+ /// yet been scheduled. In general, we will delay scheduling this chain
+ /// until those predecessors are scheduled (or we find a sufficiently good
+ /// reason to override this heuristic.) Note that when forming loop chains,
+ /// blocks outside the loop are ignored and treated as if they were already
+ /// scheduled.
+ ///
+ /// Note: This field is reinitialized multiple times - once for each loop,
+ /// and then once for the function as a whole.
+ unsigned UnscheduledPredecessors;
+};
+}
+
+namespace {
+class MachineBlockPlacement : public MachineFunctionPass {
+ /// \brief A typedef for a block filter set.
+ typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+
+ /// \brief work lists of blocks that are ready to be laid out
+ SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+ SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
+
+ /// \brief Machine Function
+ MachineFunction *F;
+
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
+
+ /// \brief A handle to the loop info.
+ MachineLoopInfo *MLI;
+
+ /// \brief A handle to the target's instruction info.
+ const TargetInstrInfo *TII;
+
+ /// \brief A handle to the target's lowering info.
+ const TargetLoweringBase *TLI;
+
+ /// \brief A handle to the post dominator tree.
+ MachineDominatorTree *MDT;
+
+ /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
+ /// all terminators of the MachineFunction.
+ SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
+
+ /// \brief Allocator and owner of BlockChain structures.
+ ///
+ /// We build BlockChains lazily while processing the loop structure of
+ /// a function. To reduce malloc traffic, we allocate them using this
+ /// slab-like allocator, and destroy them after the pass completes. An
+ /// important guarantee is that this allocator produces stable pointers to
+ /// the chains.
+ SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
+
+ /// \brief Function wide BasicBlock to BlockChain mapping.
+ ///
+ /// This mapping allows efficiently moving from any given basic block to the
+ /// BlockChain it participates in, if any. We use it to, among other things,
+ /// allow implicitly defining edges between chains as the existing edges
+ /// between basic blocks.
+ DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+
+ void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter = nullptr);
+ BranchProbability
+ collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors);
+ bool shouldPredBlockBeOutlined(MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ BranchProbability SuccProb,
+ BranchProbability HotProb);
+ bool
+ hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *
+ selectBestCandidateBlock(BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &WorkList);
+ MachineBasicBlock *
+ getFirstUnplacedBlock(const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
+
+ /// \brief Add a basic block to the work list if it is appropriate.
+ ///
+ /// If the optional parameter BlockFilter is provided, only MBB
+ /// present in the set will be added to the worklist. If nullptr
+ /// is provided, no filtering occurs.
+ void fillWorkLists(MachineBasicBlock *MBB,
+ SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
+ const BlockFilterSet *BlockFilter);
+ void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter = nullptr);
+ MachineBasicBlock *findBestLoopTop(MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopExit(MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ BlockFilterSet collectLoopBlockSet(MachineLoop &L);
+ void buildLoopChains(MachineLoop &L);
+ void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet);
+ void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
+ void collectMustExecuteBBs();
+ void buildCFGChains();
+ void optimizeBranches();
+ void alignBlocks();
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacement() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacement::ID = 0;
+char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
+ "Branch Probability Basic Block Placement", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
+ "Branch Probability Basic Block Placement", false, false)
+
+#ifndef NDEBUG
+/// \brief Helper to print the name of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockName(MachineBasicBlock *BB) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "BB#" << BB->getNumber();
+ OS << " ('" << BB->getName() << "')";
+ OS.flush();
+ return Result;
+}
+#endif
+
+/// \brief Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the worklist passed in.
+void MachineBlockPlacement::markChainSuccessors(
+ BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter) {
+ // Walk all the blocks in this chain, marking their successors as having
+ // a predecessor placed.
+ for (MachineBasicBlock *MBB : Chain) {
+ // Add any successors for which this is the only un-placed in-loop
+ // predecessor to the worklist as a viable candidate for CFG-neutral
+ // placement. No subsequent placement of this block will violate the CFG
+ // shape, so we get to use heuristics to choose a favorable placement.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (BlockFilter && !BlockFilter->count(Succ))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Disregard edges within a fixed chain, or edges to the loop header.
+ if (&Chain == &SuccChain || Succ == LoopHeaderBB)
+ continue;
+
+ // This is a cross-chain edge that is within the loop, so decrement the
+ // loop predecessor count of the destination chain.
+ if (SuccChain.UnscheduledPredecessors == 0 ||
+ --SuccChain.UnscheduledPredecessors > 0)
+ continue;
+
+ auto *MBB = *SuccChain.begin();
+ if (MBB->isEHPad())
+ EHPadWorkList.push_back(MBB);
+ else
+ BlockWorkList.push_back(MBB);
+ }
+ }
+}
+
+/// This helper function collects the set of successors of block
+/// \p BB that are allowed to be its layout successors, and return
+/// the total branch probability of edges from \p BB to those
+/// blocks.
+BranchProbability MachineBlockPlacement::collectViableSuccessors(
+ MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors) {
+ // Adjust edge probabilities by excluding edges pointing to blocks that is
+ // either not in BlockFilter or is already in the current chain. Consider the
+ // following CFG:
+ //
+ // --->A
+ // | / \
+ // | B C
+ // | \ / \
+ // ----D E
+ //
+ // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
+ // A->C is chosen as a fall-through, D won't be selected as a successor of C
+ // due to CFG constraint (the probability of C->D is not greater than
+ // HotProb to break top-order). If we exclude E that is not in BlockFilter
+ // when calculating the probability of C->D, D will be selected and we
+ // will get A C D B as the layout of this loop.
+ auto AdjustedSumProb = BranchProbability::getOne();
+ for (MachineBasicBlock *Succ : BB->successors()) {
+ bool SkipSucc = false;
+ if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
+ SkipSucc = true;
+ } else {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (SuccChain == &Chain) {
+ SkipSucc = true;
+ } else if (Succ != *SuccChain->begin()) {
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n");
+ continue;
+ }
+ }
+ if (SkipSucc)
+ AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ);
+ else
+ Successors.push_back(Succ);
+ }
+
+ return AdjustedSumProb;
+}
+
+/// The helper function returns the branch probability that is adjusted
+/// or normalized over the new total \p AdjustedSumProb.
+static BranchProbability
+getAdjustedProbability(BranchProbability OrigProb,
+ BranchProbability AdjustedSumProb) {
+ BranchProbability SuccProb;
+ uint32_t SuccProbN = OrigProb.getNumerator();
+ uint32_t SuccProbD = AdjustedSumProb.getNumerator();
+ if (SuccProbN >= SuccProbD)
+ SuccProb = BranchProbability::getOne();
+ else
+ SuccProb = BranchProbability(SuccProbN, SuccProbD);
+
+ return SuccProb;
+}
+
+/// When the option OutlineOptionalBranches is on, this method
+/// checks if the fallthrough candidate block \p Succ (of block
+/// \p BB) also has other unscheduled predecessor blocks which
+/// are also successors of \p BB (forming triangular shape CFG).
+/// If none of such predecessors are small, it returns true.
+/// The caller can choose to select \p Succ as the layout successors
+/// so that \p Succ's predecessors (optional branches) can be
+/// outlined.
+/// FIXME: fold this with more general layout cost analysis.
+bool MachineBlockPlacement::shouldPredBlockBeOutlined(
+ MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter, BranchProbability SuccProb,
+ BranchProbability HotProb) {
+ if (!OutlineOptionalBranches)
+ return false;
+ // If we outline optional branches, look whether Succ is unavoidable, i.e.
+ // dominates all terminators of the MachineFunction. If it does, other
+ // successors must be optional. Don't do this for cold branches.
+ if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Check whether there is an unplaced optional branch.
+ if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
+ continue;
+ // Check whether the optional branch has exactly one BB.
+ if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+ continue;
+ // Check whether the optional branch is small.
+ if (Pred->size() < OutlineOptionalThreshold)
+ return false;
+ }
+ return true;
+ } else
+ return false;
+}
+
+// When profile is not present, return the StaticLikelyProb.
+// When profile is available, we need to handle the triangle-shape CFG.
+static BranchProbability getLayoutSuccessorProbThreshold(
+ MachineBasicBlock *BB) {
+ if (!BB->getParent()->getFunction()->getEntryCount())
+ return BranchProbability(StaticLikelyProb, 100);
+ if (BB->succ_size() == 2) {
+ const MachineBasicBlock *Succ1 = *BB->succ_begin();
+ const MachineBasicBlock *Succ2 = *(BB->succ_begin() + 1);
+ if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
+ /* See case 1 below for the cost analysis. For BB->Succ to
+ * be taken with smaller cost, the following needs to hold:
+ * Prob(BB->Succ) > 2* Prob(BB->Pred)
+ * So the threshold T
+ * T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1,
+ * We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified
+ * branch bias, we have
+ * T = (2/3)*(ProfileLikelyProb/50)
+ * = (2*ProfileLikelyProb)/150)
+ */
+ return BranchProbability(2 * ProfileLikelyProb, 150);
+ }
+ }
+ return BranchProbability(ProfileLikelyProb, 100);
+}
+
+/// Checks to see if the layout candidate block \p Succ has a better layout
+/// predecessor than \c BB. If yes, returns true.
+bool MachineBlockPlacement::hasBetterLayoutPredecessor(
+ MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
+ BranchProbability SuccProb, BranchProbability RealSuccProb,
+ BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+
+ // There isn't a better layout when there are no unscheduled predecessors.
+ if (SuccChain.UnscheduledPredecessors == 0)
+ return false;
+
+ // There are two basic scenarios here:
+ // -------------------------------------
+ // Case 1: triangular shape CFG (if-then):
+ // BB
+ // | \
+ // | \
+ // | Pred
+ // | /
+ // Succ
+ // In this case, we are evaluating whether to select edge -> Succ, e.g.
+ // set Succ as the layout successor of BB. Picking Succ as BB's
+ // successor breaks the CFG constraints (FIXME: define these constraints).
+ // With this layout, Pred BB
+ // is forced to be outlined, so the overall cost will be cost of the
+ // branch taken from BB to Pred, plus the cost of back taken branch
+ // from Pred to Succ, as well as the additional cost associated
+ // with the needed unconditional jump instruction from Pred To Succ.
+
+ // The cost of the topological order layout is the taken branch cost
+ // from BB to Succ, so to make BB->Succ a viable candidate, the following
+ // must hold:
+ // 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
+ // < freq(BB->Succ) * taken_branch_cost.
+ // Ignoring unconditional jump cost, we get
+ // freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
+ // prob(BB->Succ) > 2 * prob(BB->Pred)
+ //
+ // When real profile data is available, we can precisely compute the
+ // probability threshold that is needed for edge BB->Succ to be considered.
+ // Without profile data, the heuristic requires the branch bias to be
+ // a lot larger to make sure the signal is very strong (e.g. 80% default).
+ // -----------------------------------------------------------------
+ // Case 2: diamond like CFG (if-then-else):
+ // S
+ // / \
+ // | \
+ // BB Pred
+ // \ /
+ // Succ
+ // ..
+ //
+ // The current block is BB and edge BB->Succ is now being evaluated.
+ // Note that edge S->BB was previously already selected because
+ // prob(S->BB) > prob(S->Pred).
+ // At this point, 2 blocks can be placed after BB: Pred or Succ. If we
+ // choose Pred, we will have a topological ordering as shown on the left
+ // in the picture below. If we choose Succ, we have the solution as shown
+ // on the right:
+ //
+ // topo-order:
+ //
+ // S----- ---S
+ // | | | |
+ // ---BB | | BB
+ // | | | |
+ // | pred-- | Succ--
+ // | | | |
+ // ---succ ---pred--
+ //
+ // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred)
+ // = freq(S->Pred) + freq(S->BB)
+ //
+ // If we have profile data (i.e, branch probabilities can be trusted), the
+ // cost (number of taken branches) with layout S->BB->Succ->Pred is 2 *
+ // freq(S->Pred) while the cost of topo order is freq(S->Pred) + freq(S->BB).
+ // We know Prob(S->BB) > Prob(S->Pred), so freq(S->BB) > freq(S->Pred), which
+ // means the cost of topological order is greater.
+ // When profile data is not available, however, we need to be more
+ // conservative. If the branch prediction is wrong, breaking the topo-order
+ // will actually yield a layout with large cost. For this reason, we need
+ // strong biased branch at block S with Prob(S->BB) in order to select
+ // BB->Succ. This is equivalent to looking the CFG backward with backward
+ // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
+ // profile data).
+
+ BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
+
+ // Forward checking. For case 2, SuccProb will be 1.
+ if (SuccProb < HotProb) {
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
+ << " (prob) (CFG conflict)\n");
+ return true;
+ }
+
+ // Make sure that a hot successor doesn't have a globally more
+ // important predecessor.
+ BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
+ bool BadCFGConflict = false;
+
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
+ (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
+ continue;
+ // Do backward checking. For case 1, it is actually redundant check. For
+ // case 2 above, we need a backward checking to filter out edges that are
+ // not 'strongly' biased. With profile data available, the check is mostly
+ // redundant too (when threshold prob is set at 50%) unless S has more than
+ // two successors.
+ // BB Pred
+ // \ /
+ // Succ
+ // We select edge BB->Succ if
+ // freq(BB->Succ) > freq(Succ) * HotProb
+ // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
+ // HotProb
+ // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
+ BlockFrequency PredEdgeFreq =
+ MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
+ if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
+ BadCFGConflict = true;
+ break;
+ }
+ }
+
+ if (BadCFGConflict) {
+ DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
+ << " (prob) (non-cold CFG conflict)\n");
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *
+MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ const BranchProbability HotProb(StaticLikelyProb, 100);
+
+ MachineBasicBlock *BestSucc = nullptr;
+ auto BestProb = BranchProbability::getZero();
+
+ SmallVector<MachineBasicBlock *, 4> Successors;
+ auto AdjustedSumProb =
+ collectViableSuccessors(BB, Chain, BlockFilter, Successors);
+
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock *Succ : Successors) {
+ auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
+ BranchProbability SuccProb =
+ getAdjustedProbability(RealSuccProb, AdjustedSumProb);
+
+ // This heuristic is off by default.
+ if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
+ HotProb))
+ return Succ;
+
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Skip the edge \c BB->Succ if block \c Succ has a better layout
+ // predecessor that yields lower global cost.
+ if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
+ Chain, BlockFilter))
+ continue;
+
+ DEBUG(
+ dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
+ << " (prob)"
+ << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
+ << "\n");
+ if (BestSucc && BestProb >= SuccProb)
+ continue;
+ BestSucc = Succ;
+ BestProb = SuccProb;
+ }
+ return BestSucc;
+}
+
+/// \brief Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve i-cache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
+ // Once we need to walk the worklist looking for a candidate, cleanup the
+ // worklist of already placed entries.
+ // FIXME: If this shows up on profiles, it could be folded (at the cost of
+ // some code complexity) into the loop below.
+ WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+ [&](MachineBasicBlock *BB) {
+ return BlockToChain.lookup(BB) == &Chain;
+ }),
+ WorkList.end());
+
+ if (WorkList.empty())
+ return nullptr;
+
+ bool IsEHPad = WorkList[0]->isEHPad();
+
+ MachineBasicBlock *BestBlock = nullptr;
+ BlockFrequency BestFreq;
+ for (MachineBasicBlock *MBB : WorkList) {
+ assert(MBB->isEHPad() == IsEHPad);
+
+ BlockChain &SuccChain = *BlockToChain[MBB];
+ if (&SuccChain == &Chain)
+ continue;
+
+ assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block");
+
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
+ DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
+ MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+
+ // For ehpad, we layout the least probable first as to avoid jumping back
+ // from least probable landingpads to more probable ones.
+ //
+ // FIXME: Using probability is probably (!) not the best way to achieve
+ // this. We should probably have a more principled approach to layout
+ // cleanup code.
+ //
+ // The goal is to get:
+ //
+ // +--------------------------+
+ // | V
+ // InnerLp -> InnerCleanup OuterLp -> OuterCleanup -> Resume
+ //
+ // Rather than:
+ //
+ // +-------------------------------------+
+ // V |
+ // OuterLp -> OuterCleanup -> Resume InnerLp -> InnerCleanup
+ if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
+ continue;
+
+ BestBlock = MBB;
+ BestFreq = CandidateFreq;
+ }
+
+ return BestBlock;
+}
+
+/// \brief Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter) {
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
+ ++I) {
+ if (BlockFilter && !BlockFilter->count(&*I))
+ continue;
+ if (BlockToChain[&*I] != &PlacedChain) {
+ PrevUnplacedBlockIt = I;
+ // Now select the head of the chain to which the unplaced block belongs
+ // as the block to place. This will force the entire chain to be placed,
+ // and satisfies the requirements of merging chains.
+ return *BlockToChain[&*I]->begin();
+ }
+ }
+ return nullptr;
+}
+
+void MachineBlockPlacement::fillWorkLists(
+ MachineBasicBlock *MBB,
+ SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
+ const BlockFilterSet *BlockFilter = nullptr) {
+ BlockChain &Chain = *BlockToChain[MBB];
+ if (!UpdatedPreds.insert(&Chain).second)
+ return;
+
+ assert(Chain.UnscheduledPredecessors == 0);
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain);
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockFilter && !BlockFilter->count(Pred))
+ continue;
+ if (BlockToChain[Pred] == &Chain)
+ continue;
+ ++Chain.UnscheduledPredecessors;
+ }
+ }
+
+ if (Chain.UnscheduledPredecessors != 0)
+ return;
+
+ MBB = *Chain.begin();
+ if (MBB->isEHPad())
+ EHPadWorkList.push_back(MBB);
+ else
+ BlockWorkList.push_back(MBB);
+}
+
+void MachineBlockPlacement::buildChain(
+ MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ assert(BB && "BB must not be null.\n");
+ assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
+ MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
+
+ MachineBasicBlock *LoopHeaderBB = BB;
+ markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
+ BB = *std::prev(Chain.end());
+ for (;;) {
+ assert(BB && "null block found at end of chain in loop.");
+ assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
+ assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
+
+
+ // Look for the best viable successor if there is one to place immediately
+ // after this block.
+ MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+
+ // If an immediate successor isn't available, look for the best viable
+ // block among those we've identified as not violating the loop's CFG at
+ // this point. This won't be a fallthrough, but it will increase locality.
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
+
+ if (!BestSucc) {
+ BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
+ if (!BestSucc)
+ break;
+
+ DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+ "layout successor until the CFG reduces\n");
+ }
+
+ // Place this block, updating the datastructures to reflect its placement.
+ BlockChain &SuccChain = *BlockToChain[BestSucc];
+ // Zero out UnscheduledPredecessors for the successor we're about to merge in case
+ // we selected a successor that didn't fit naturally into the CFG.
+ SuccChain.UnscheduledPredecessors = 0;
+ DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
+ << getBlockName(BestSucc) << "\n");
+ markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
+ Chain.merge(BestSucc, &SuccChain);
+ BB = *std::prev(Chain.end());
+ }
+
+ DEBUG(dbgs() << "Finished forming chain for header block "
+ << getBlockName(*Chain.begin()) << "\n");
+}
+
+/// \brief Find the best loop top block for layout.
+///
+/// Look for a block which is strictly better than the loop header for laying
+/// out at the top of the loop. This looks for one and only one pattern:
+/// a latch block with no conditional exit. This block will cause a conditional
+/// jump around it or will be the bottom of the loop if we lay it out in place,
+/// but if it it doesn't end up at the bottom of the loop for any reason,
+/// rotation alone won't fix it. Because such a block will always result in an
+/// unconditional jump (for the backedge) rotating it in front of the loop
+/// header is always profitable.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Check that the header hasn't been fused with a preheader block due to
+ // crazy branches. If it has, we need to start with the header at the top to
+ // prevent pulling the preheader into the loop body.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return L.getHeader();
+
+ DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
+ << "\n");
+
+ BlockFrequency BestPredFreq;
+ MachineBasicBlock *BestPred = nullptr;
+ for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
+ << Pred->succ_size() << " successors, ";
+ MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
+ if (Pred->succ_size() > 1)
+ continue;
+
+ BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ if (!BestPred || PredFreq > BestPredFreq ||
+ (!(PredFreq < BestPredFreq) &&
+ Pred->isLayoutSuccessor(L.getHeader()))) {
+ BestPred = Pred;
+ BestPredFreq = PredFreq;
+ }
+ }
+
+ // If no direct predecessor is fine, just use the loop header.
+ if (!BestPred) {
+ DEBUG(dbgs() << " final top unchanged\n");
+ return L.getHeader();
+ }
+
+ // Walk backwards through any straight line of predecessors.
+ while (BestPred->pred_size() == 1 &&
+ (*BestPred->pred_begin())->succ_size() == 1 &&
+ *BestPred->pred_begin() != L.getHeader())
+ BestPred = *BestPred->pred_begin();
+
+ DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n");
+ return BestPred;
+}
+
+/// \brief Find the best loop exiting block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // We don't want to layout the loop linearly in all cases. If the loop header
+ // is just a normal basic block in the loop, we want to look for what block
+ // within the loop is the best one to layout at the top. However, if the loop
+ // header has be pre-merged into a chain due to predecessors not having
+ // analyzable branches, *and* the predecessor it is merged with is *not* part
+ // of the loop, rotating the header into the middle of the loop will create
+ // a non-contiguous range of blocks which is Very Bad. So start with the
+ // header and only rotate if safe.
+ BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ if (!LoopBlockSet.count(*HeaderChain.begin()))
+ return nullptr;
+
+ BlockFrequency BestExitEdgeFreq;
+ unsigned BestExitLoopDepth = 0;
+ MachineBasicBlock *ExitingBB = nullptr;
+ // If there are exits to outer loops, loop rotation can severely limit
+ // fallthrough opportunities unless it selects such an exit. Keep a set of
+ // blocks where rotating to exit with that block will reach an outer loop.
+ SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+ DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
+ << "\n");
+ for (MachineBasicBlock *MBB : L.getBlocks()) {
+ BlockChain &Chain = *BlockToChain[MBB];
+ // Ensure that this block is at the end of a chain; otherwise it could be
+ // mid-way through an inner loop or a successor of an unanalyzable branch.
+ if (MBB != *std::prev(Chain.end()))
+ continue;
+
+ // Now walk the successors. We need to establish whether this has a viable
+ // exiting successor and whether it has a viable non-exiting successor.
+ // We store the old exiting state and restore it if a viable looping
+ // successor isn't found.
+ MachineBasicBlock *OldExitingBB = ExitingBB;
+ BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+ bool HasLoopingSucc = false;
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ->isEHPad())
+ continue;
+ if (Succ == MBB)
+ continue;
+ BlockChain &SuccChain = *BlockToChain[Succ];
+ // Don't split chains, either this chain or the successor's chain.
+ if (&Chain == &SuccChain) {
+ DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (chain conflict)\n");
+ continue;
+ }
+
+ auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
+ if (LoopBlockSet.count(Succ)) {
+ DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (" << SuccProb << ")\n");
+ HasLoopingSucc = true;
+ continue;
+ }
+
+ unsigned SuccLoopDepth = 0;
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
+ SuccLoopDepth = ExitLoop->getLoopDepth();
+ if (ExitLoop->contains(&L))
+ BlocksExitingToOuterLoop.insert(MBB);
+ }
+
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
+ DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
+ MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+ // Note that we bias this toward an existing layout successor to retain
+ // incoming order in the absence of better information. The exit must have
+ // a frequency higher than the current exit before we consider breaking
+ // the layout.
+ BranchProbability Bias(100 - ExitBlockBias, 100);
+ if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
+ ExitEdgeFreq > BestExitEdgeFreq ||
+ (MBB->isLayoutSuccessor(Succ) &&
+ !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
+ BestExitEdgeFreq = ExitEdgeFreq;
+ ExitingBB = MBB;
+ }
+ }
+
+ if (!HasLoopingSucc) {
+ // Restore the old exiting state, no viable looping successor was found.
+ ExitingBB = OldExitingBB;
+ BestExitEdgeFreq = OldBestExitEdgeFreq;
+ }
+ }
+ // Without a candidate exiting block or with only a single block in the
+ // loop, just use the loop header to layout the loop.
+ if (!ExitingBB || L.getNumBlocks() == 1)
+ return nullptr;
+
+ // Also, if we have exit blocks which lead to outer loops but didn't select
+ // one of them as the exiting block we are rotating toward, disable loop
+ // rotation altogether.
+ if (!BlocksExitingToOuterLoop.empty() &&
+ !BlocksExitingToOuterLoop.count(ExitingBB))
+ return nullptr;
+
+ DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
+ return ExitingBB;
+}
+
+/// \brief Attempt to rotate an exiting block to the bottom of the loop.
+///
+/// Once we have built a chain, try to rotate it to line up the hot exit block
+/// with fallthrough out of the loop if doing so doesn't introduce unnecessary
+/// branches. For example, if the loop has fallthrough into its header and out
+/// of its bottom already, don't rotate it.
+void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
+ MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet) {
+ if (!ExitingBB)
+ return;
+
+ MachineBasicBlock *Top = *LoopChain.begin();
+ bool ViableTopFallthrough = false;
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ ViableTopFallthrough = true;
+ break;
+ }
+ }
+
+ // If the header has viable fallthrough, check whether the current loop
+ // bottom is a viable exiting block. If so, bail out as rotating will
+ // introduce an unnecessary branch.
+ if (ViableTopFallthrough) {
+ MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
+ for (MachineBasicBlock *Succ : Bottom->successors()) {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (!LoopBlockSet.count(Succ) &&
+ (!SuccChain || Succ == *SuccChain->begin()))
+ return;
+ }
+ }
+
+ BlockChain::iterator ExitIt =
+ std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
+ if (ExitIt == LoopChain.end())
+ return;
+
+ std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
+}
+
+/// \brief Attempt to rotate a loop based on profile data to reduce branch cost.
+///
+/// With profile data, we can determine the cost in terms of missed fall through
+/// opportunities when rotating a loop chain and select the best rotation.
+/// Basically, there are three kinds of cost to consider for each rotation:
+/// 1. The possibly missed fall through edge (if it exists) from BB out of
+/// the loop to the loop header.
+/// 2. The possibly missed fall through edges (if they exist) from the loop
+/// exits to BB out of the loop.
+/// 3. The missed fall through edge (if it exists) from the last BB to the
+/// first BB in the loop chain.
+/// Therefore, the cost for a given rotation is the sum of costs listed above.
+/// We select the best rotation with the smallest cost.
+void MachineBlockPlacement::rotateLoopWithProfile(
+ BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
+ auto HeaderBB = L.getHeader();
+ auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB);
+ auto RotationPos = LoopChain.end();
+
+ BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
+
+ // A utility lambda that scales up a block frequency by dividing it by a
+ // branch probability which is the reciprocal of the scale.
+ auto ScaleBlockFrequency = [](BlockFrequency Freq,
+ unsigned Scale) -> BlockFrequency {
+ if (Scale == 0)
+ return 0;
+ // Use operator / between BlockFrequency and BranchProbability to implement
+ // saturating multiplication.
+ return Freq / BranchProbability(1, Scale);
+ };
+
+ // Compute the cost of the missed fall-through edge to the loop header if the
+ // chain head is not the loop header. As we only consider natural loops with
+ // single header, this computation can be done only once.
+ BlockFrequency HeaderFallThroughCost(0);
+ for (auto *Pred : HeaderBB->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ auto EdgeFreq =
+ MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
+ auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
+ // If the predecessor has only an unconditional jump to the header, we
+ // need to consider the cost of this jump.
+ if (Pred->succ_size() == 1)
+ FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
+ HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
+ }
+ }
+
+ // Here we collect all exit blocks in the loop, and for each exit we find out
+ // its hottest exit edge. For each loop rotation, we define the loop exit cost
+ // as the sum of frequencies of exit edges we collect here, excluding the exit
+ // edge from the tail of the loop chain.
+ SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq;
+ for (auto BB : LoopChain) {
+ auto LargestExitEdgeProb = BranchProbability::getZero();
+ for (auto *Succ : BB->successors()) {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (!LoopBlockSet.count(Succ) &&
+ (!SuccChain || Succ == *SuccChain->begin())) {
+ auto SuccProb = MBPI->getEdgeProbability(BB, Succ);
+ LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
+ }
+ }
+ if (LargestExitEdgeProb > BranchProbability::getZero()) {
+ auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
+ ExitsWithFreq.emplace_back(BB, ExitFreq);
+ }
+ }
+
+ // In this loop we iterate every block in the loop chain and calculate the
+ // cost assuming the block is the head of the loop chain. When the loop ends,
+ // we should have found the best candidate as the loop chain's head.
+ for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
+ EndIter = LoopChain.end();
+ Iter != EndIter; Iter++, TailIter++) {
+ // TailIter is used to track the tail of the loop chain if the block we are
+ // checking (pointed by Iter) is the head of the chain.
+ if (TailIter == LoopChain.end())
+ TailIter = LoopChain.begin();
+
+ auto TailBB = *TailIter;
+
+ // Calculate the cost by putting this BB to the top.
+ BlockFrequency Cost = 0;
+
+ // If the current BB is the loop header, we need to take into account the
+ // cost of the missed fall through edge from outside of the loop to the
+ // header.
+ if (Iter != HeaderIter)
+ Cost += HeaderFallThroughCost;
+
+ // Collect the loop exit cost by summing up frequencies of all exit edges
+ // except the one from the chain tail.
+ for (auto &ExitWithFreq : ExitsWithFreq)
+ if (TailBB != ExitWithFreq.first)
+ Cost += ExitWithFreq.second;
+
+ // The cost of breaking the once fall-through edge from the tail to the top
+ // of the loop chain. Here we need to consider three cases:
+ // 1. If the tail node has only one successor, then we will get an
+ // additional jmp instruction. So the cost here is (MisfetchCost +
+ // JumpInstCost) * tail node frequency.
+ // 2. If the tail node has two successors, then we may still get an
+ // additional jmp instruction if the layout successor after the loop
+ // chain is not its CFG successor. Note that the more frequently executed
+ // jmp instruction will be put ahead of the other one. Assume the
+ // frequency of those two branches are x and y, where x is the frequency
+ // of the edge to the chain head, then the cost will be
+ // (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency.
+ // 3. If the tail node has more than two successors (this rarely happens),
+ // we won't consider any additional cost.
+ if (TailBB->isSuccessor(*Iter)) {
+ auto TailBBFreq = MBFI->getBlockFreq(TailBB);
+ if (TailBB->succ_size() == 1)
+ Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
+ MisfetchCost + JumpInstCost);
+ else if (TailBB->succ_size() == 2) {
+ auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
+ auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
+ auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
+ ? TailBBFreq * TailToHeadProb.getCompl()
+ : TailToHeadFreq;
+ Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) +
+ ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
+ }
+ }
+
+ DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockName(*Iter)
+ << " to the top: " << Cost.getFrequency() << "\n");
+
+ if (Cost < SmallestRotationCost) {
+ SmallestRotationCost = Cost;
+ RotationPos = Iter;
+ }
+ }
+
+ if (RotationPos != LoopChain.end()) {
+ DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos)
+ << " to the top\n");
+ std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
+ }
+}
+
+/// \brief Collect blocks in the given loop that are to be placed.
+///
+/// When profile data is available, exclude cold blocks from the returned set;
+/// otherwise, collect all blocks in the loop.
+MachineBlockPlacement::BlockFilterSet
+MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
+ BlockFilterSet LoopBlockSet;
+
+ // Filter cold blocks off from LoopBlockSet when profile data is available.
+ // Collect the sum of frequencies of incoming edges to the loop header from
+ // outside. If we treat the loop as a super block, this is the frequency of
+ // the loop. Then for each block in the loop, we calculate the ratio between
+ // its frequency and the frequency of the loop block. When it is too small,
+ // don't add it to the loop chain. If there are outer loops, then this block
+ // will be merged into the first outer loop chain for which this block is not
+ // cold anymore. This needs precise profile data and we only do this when
+ // profile data is available.
+ if (F->getFunction()->getEntryCount()) {
+ BlockFrequency LoopFreq(0);
+ for (auto LoopPred : L.getHeader()->predecessors())
+ if (!L.contains(LoopPred))
+ LoopFreq += MBFI->getBlockFreq(LoopPred) *
+ MBPI->getEdgeProbability(LoopPred, L.getHeader());
+
+ for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
+ if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
+ continue;
+ LoopBlockSet.insert(LoopBB);
+ }
+ } else
+ LoopBlockSet.insert(L.block_begin(), L.block_end());
+
+ return LoopBlockSet;
+}
+
+/// \brief Forms basic block chains from the natural loop structures.
+///
+/// These chains are designed to preserve the existing *structure* of the code
+/// as much as possible. We can then stitch the chains together in a way which
+/// both preserves the topological structure and minimizes taken conditional
+/// branches.
+void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
+ // First recurse through any nested loops, building chains for those inner
+ // loops.
+ for (MachineLoop *InnerLoop : L)
+ buildLoopChains(*InnerLoop);
+
+ assert(BlockWorkList.empty());
+ assert(EHPadWorkList.empty());
+ BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
+
+ // Check if we have profile data for this function. If yes, we will rotate
+ // this loop by modeling costs more precisely which requires the profile data
+ // for better layout.
+ bool RotateLoopWithProfile =
+ ForcePreciseRotationCost ||
+ (PreciseRotationCost && F->getFunction()->getEntryCount());
+
+ // First check to see if there is an obviously preferable top block for the
+ // loop. This will default to the header, but may end up as one of the
+ // predecessors to the header if there is one which will result in strictly
+ // fewer branches in the loop body.
+ // When we use profile data to rotate the loop, this is unnecessary.
+ MachineBasicBlock *LoopTop =
+ RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
+
+ // If we selected just the header for the loop top, look for a potentially
+ // profitable exit block in the event that rotating the loop can eliminate
+ // branches by placing an exit edge at the bottom.
+ MachineBasicBlock *ExitingBB = nullptr;
+ if (!RotateLoopWithProfile && LoopTop == L.getHeader())
+ ExitingBB = findBestLoopExit(L, LoopBlockSet);
+
+ BlockChain &LoopChain = *BlockToChain[LoopTop];
+
+ // FIXME: This is a really lame way of walking the chains in the loop: we
+ // walk the blocks, and use a set to prevent visiting a particular chain
+ // twice.
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ assert(LoopChain.UnscheduledPredecessors == 0);
+ UpdatedPreds.insert(&LoopChain);
+
+ for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
+
+ buildChain(LoopTop, LoopChain, &LoopBlockSet);
+
+ if (RotateLoopWithProfile)
+ rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
+ else
+ rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
+
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadLoop = false;
+ if (LoopChain.UnscheduledPredecessors) {
+ BadLoop = true;
+ dbgs() << "Loop chain contains a block without its preds placed!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+ }
+ for (MachineBasicBlock *ChainBB : LoopChain) {
+ dbgs() << " ... " << getBlockName(ChainBB) << "\n";
+ if (!LoopBlockSet.erase(ChainBB)) {
+ // We don't mark the loop as bad here because there are real situations
+ // where this can occur. For example, with an unanalyzable fallthrough
+ // from a loop block to a non-loop block or vice versa.
+ dbgs() << "Loop chain contains a block not contained by the loop!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
+ }
+ }
+
+ if (!LoopBlockSet.empty()) {
+ BadLoop = true;
+ for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ dbgs() << "Loop contains blocks never placed into a chain!\n"
+ << " Loop header: " << getBlockName(*L.block_begin()) << "\n"
+ << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+ << " Bad block: " << getBlockName(LoopBB) << "\n";
+ }
+ assert(!BadLoop && "Detected problems with the placement of this loop.");
+ });
+
+ BlockWorkList.clear();
+ EHPadWorkList.clear();
+}
+
+/// When OutlineOpitonalBranches is on, this method collects BBs that
+/// dominates all terminator blocks of the function \p F.
+void MachineBlockPlacement::collectMustExecuteBBs() {
+ if (OutlineOptionalBranches) {
+ // Find the nearest common dominator of all of F's terminators.
+ MachineBasicBlock *Terminator = nullptr;
+ for (MachineBasicBlock &MBB : *F) {
+ if (MBB.succ_size() == 0) {
+ if (Terminator == nullptr)
+ Terminator = &MBB;
+ else
+ Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
+ }
+ }
+
+ // MBBs dominating this common dominator are unavoidable.
+ UnavoidableBlocks.clear();
+ for (MachineBasicBlock &MBB : *F) {
+ if (MDT->dominates(&MBB, Terminator)) {
+ UnavoidableBlocks.insert(&MBB);
+ }
+ }
+ }
+}
+
+void MachineBlockPlacement::buildCFGChains() {
+ // Ensure that every BB in the function has an associated chain to simplify
+ // the assumptions of the remaining algorithm.
+ SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+ for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;
+ ++FI) {
+ MachineBasicBlock *BB = &*FI;
+ BlockChain *Chain =
+ new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ // Also, merge any blocks which we cannot reason about and must preserve
+ // the exact fallthrough behavior for.
+ for (;;) {
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+ break;
+
+ MachineFunction::iterator NextFI = std::next(FI);
+ MachineBasicBlock *NextBB = &*NextFI;
+ // Ensure that the layout successor is a viable block, as we know that
+ // fallthrough is a possibility.
+ assert(NextFI != FE && "Can't fallthrough past the last block.");
+ DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+ << getBlockName(BB) << " -> " << getBlockName(NextBB)
+ << "\n");
+ Chain->merge(NextBB, nullptr);
+ FI = NextFI;
+ BB = NextBB;
+ }
+ }
+
+ // Turned on with OutlineOptionalBranches option
+ collectMustExecuteBBs();
+
+ // Build any loop-based chains.
+ for (MachineLoop *L : *MLI)
+ buildLoopChains(*L);
+
+ assert(BlockWorkList.empty());
+ assert(EHPadWorkList.empty());
+
+ SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+ for (MachineBasicBlock &MBB : *F)
+ fillWorkLists(&MBB, UpdatedPreds);
+
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ buildChain(&F->front(), FunctionChain);
+
+#ifndef NDEBUG
+ typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+#endif
+ DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadFunc = false;
+ FunctionBlockSetType FunctionBlockSet;
+ for (MachineBasicBlock &MBB : *F)
+ FunctionBlockSet.insert(&MBB);
+
+ for (MachineBasicBlock *ChainBB : FunctionChain)
+ if (!FunctionBlockSet.erase(ChainBB)) {
+ BadFunc = true;
+ dbgs() << "Function chain contains a block not in the function!\n"
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
+ }
+
+ if (!FunctionBlockSet.empty()) {
+ BadFunc = true;
+ for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
+ dbgs() << "Function contains blocks never placed into a chain!\n"
+ << " Bad block: " << getBlockName(RemainingBB) << "\n";
+ }
+ assert(!BadFunc && "Detected problems with the block placement.");
+ });
+
+ // Splice the blocks into place.
+ MachineFunction::iterator InsertPos = F->begin();
+ DEBUG(dbgs() << "[MBP] Function: "<< F->getName() << "\n");
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(ChainBB) << "\n");
+ if (InsertPos != MachineFunction::iterator(ChainBB))
+ F->splice(InsertPos, ChainBB);
+ else
+ ++InsertPos;
+
+ // Update the terminator of the previous block.
+ if (ChainBB == *FunctionChain.begin())
+ continue;
+ MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB));
+
+ // FIXME: It would be awesome of updateTerminator would just return rather
+ // than assert when the branch cannot be analyzed in order to remove this
+ // boiler plate.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+
+ // The "PrevBB" is not yet updated to reflect current code layout, so,
+ // o. it may fall-through to a block without explicit "goto" instruction
+ // before layout, and no longer fall-through it after layout; or
+ // o. just opposite.
+ //
+ // analyzeBranch() may return erroneous value for FBB when these two
+ // situations take place. For the first scenario FBB is mistakenly set NULL;
+ // for the 2nd scenario, the FBB, which is expected to be NULL, is
+ // mistakenly pointing to "*BI".
+ // Thus, if the future change needs to use FBB before the layout is set, it
+ // has to correct FBB first by using the code similar to the following:
+ //
+ // if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
+ // PrevBB->updateTerminator();
+ // Cond.clear();
+ // TBB = FBB = nullptr;
+ // if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // // FIXME: This should never take place.
+ // TBB = FBB = nullptr;
+ // }
+ // }
+ if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond))
+ PrevBB->updateTerminator();
+ }
+
+ // Fixup the last block.
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond))
+ F->back().updateTerminator();
+
+ BlockWorkList.clear();
+ EHPadWorkList.clear();
+}
+
+void MachineBlockPlacement::optimizeBranches() {
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+
+ // Now that all the basic blocks in the chain have the proper layout,
+ // make a final call to AnalyzeBranch with AllowModify set.
+ // Indeed, the target may be able to optimize the branches in a way we
+ // cannot because all branches may not be analyzable.
+ // E.g., the target may be able to remove an unconditional branch to
+ // a fallthrough when it occurs after predicated terminators.
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ Cond.clear();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
+ if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) {
+ // If PrevBB has a two-way branch, try to re-order the branches
+ // such that we branch to the successor with higher probability first.
+ if (TBB && !Cond.empty() && FBB &&
+ MBPI->getEdgeProbability(ChainBB, FBB) >
+ MBPI->getEdgeProbability(ChainBB, TBB) &&
+ !TII->ReverseBranchCondition(Cond)) {
+ DEBUG(dbgs() << "Reverse order of the two branches: "
+ << getBlockName(ChainBB) << "\n");
+ DEBUG(dbgs() << " Edge probability: "
+ << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
+ << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
+ DebugLoc dl; // FIXME: this is nowhere
+ TII->RemoveBranch(*ChainBB);
+ TII->InsertBranch(*ChainBB, FBB, TBB, Cond, dl);
+ ChainBB->updateTerminator();
+ }
+ }
+ }
+}
+
+void MachineBlockPlacement::alignBlocks() {
+ // Walk through the backedges of the function now that we have fully laid out
+ // the basic blocks and align the destination of each backedge. We don't rely
+ // exclusively on the loop info here so that we can align backedges in
+ // unnatural CFGs and backedges that were introduced purely because of the
+ // loop rotations done during this layout pass.
+ if (F->getFunction()->optForSize())
+ return;
+ BlockChain &FunctionChain = *BlockToChain[&F->front()];
+ if (FunctionChain.begin() == FunctionChain.end())
+ return; // Empty chain.
+
+ const BranchProbability ColdProb(1, 5); // 20%
+ BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());
+ BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ if (ChainBB == *FunctionChain.begin())
+ continue;
+
+ // Don't align non-looping basic blocks. These are unlikely to execute
+ // enough times to matter in practice. Note that we'll still handle
+ // unnatural CFGs inside of a natural outer loop (the common case) and
+ // rotated loops.
+ MachineLoop *L = MLI->getLoopFor(ChainBB);
+ if (!L)
+ continue;
+
+ unsigned Align = TLI->getPrefLoopAlignment(L);
+ if (!Align)
+ continue; // Don't care about loop alignment.
+
+ // If the block is cold relative to the function entry don't waste space
+ // aligning it.
+ BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
+ if (Freq < WeightedEntryFreq)
+ continue;
+
+ // If the block is cold relative to its loop header, don't align it
+ // regardless of what edges into the block exist.
+ MachineBasicBlock *LoopHeader = L->getHeader();
+ BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+ if (Freq < (LoopHeaderFreq * ColdProb))
+ continue;
+
+ // Check for the existence of a non-layout predecessor which would benefit
+ // from aligning this block.
+ MachineBasicBlock *LayoutPred =
+ &*std::prev(MachineFunction::iterator(ChainBB));
+
+ // Force alignment if all the predecessors are jumps. We already checked
+ // that the block isn't cold above.
+ if (!LayoutPred->isSuccessor(ChainBB)) {
+ ChainBB->setAlignment(Align);
+ continue;
+ }
+
+ // Align this block if the layout predecessor's edge into this block is
+ // cold relative to the block. When this is true, other predecessors make up
+ // all of the hot entries into the block and thus alignment is likely to be
+ // important.
+ BranchProbability LayoutProb =
+ MBPI->getEdgeProbability(LayoutPred, ChainBB);
+ BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+ if (LayoutEdgeFreq <= (Freq * ColdProb))
+ ChainBB->setAlignment(Align);
+ }
+}
+
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ // Check for single-block functions and skip them.
+ if (std::next(MF.begin()) == MF.end())
+ return false;
+
+ F = &MF;
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
+ getAnalysis<MachineBlockFrequencyInfo>());
+ MLI = &getAnalysis<MachineLoopInfo>();
+ TII = MF.getSubtarget().getInstrInfo();
+ TLI = MF.getSubtarget().getTargetLowering();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ assert(BlockToChain.empty());
+
+ buildCFGChains();
+
+ // Changing the layout can create new tail merging opportunities.
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // TailMerge can create jump into if branches that make CFG irreducible for
+ // HW that requires structured CFG.
+ bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
+ PassConfig->getEnableTailMerge() &&
+ BranchFoldPlacement;
+ // No tail merging opportunities if the block number is less than four.
+ if (MF.size() > 3 && EnableTailMerge) {
+ BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
+ *MBPI);
+
+ if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
+ /*AfterBlockPlacement=*/true)) {
+ // Redo the layout if tail merging creates/removes/moves blocks.
+ BlockToChain.clear();
+ ChainAllocator.DestroyAll();
+ buildCFGChains();
+ }
+ }
+
+ optimizeBranches();
+ alignBlocks();
+
+ BlockToChain.clear();
+ ChainAllocator.DestroyAll();
+
+ if (AlignAllBlock)
+ // Align all of the blocks in the function to a specific alignment.
+ for (MachineBasicBlock &MBB : MF)
+ MBB.setAlignment(AlignAllBlock);
+ else if (AlignAllNonFallThruBlocks) {
+ // Align all of the blocks that have no fall-through predecessors to a
+ // specific alignment.
+ for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
+ auto LayoutPred = std::prev(MBI);
+ if (!LayoutPred->isSuccessor(&*MBI))
+ MBI->setAlignment(AlignAllNonFallThruBlocks);
+ }
+ }
+
+ // We always return true as we have no way to track whether the final order
+ // differs from the original order.
+ return true;
+}
+
+namespace {
+/// \brief A pass to compute block placement statistics.
+///
+/// A separate pass to compute interesting statistics for evaluating block
+/// placement. This is separate from the actual placement pass so that they can
+/// be computed in the absence of any placement transformations or when using
+/// alternative placement strategies.
+class MachineBlockPlacementStats : public MachineFunctionPass {
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ /// \brief A handle to the function-wide block frequency pass.
+ const MachineBlockFrequencyInfo *MBFI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineBlockPlacementStats() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char MachineBlockPlacementStats::ID = 0;
+char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
+ "Basic Block Placement Stats", false, false)
+
+bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
+ // Check for single-block functions and skip them.
+ if (std::next(F.begin()) == F.end())
+ return false;
+
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+ for (MachineBasicBlock &MBB : F) {
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+ Statistic &NumBranches =
+ (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
+ Statistic &BranchTakenFreq =
+ (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
+ for (MachineBasicBlock *Succ : MBB.successors()) {
+ // Skip if this successor is a fallthrough.
+ if (MBB.isLayoutSuccessor(Succ))
+ continue;
+
+ BlockFrequency EdgeFreq =
+ BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
+ ++NumBranches;
+ BranchTakenFreq += EdgeFreq.getFrequency();
+ }
+ }
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..fe7340618374
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,93 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+ "Machine Branch Probability Analysis", false, true)
+
+cl::opt<unsigned>
+ StaticLikelyProb("static-likely-prob",
+ cl::desc("branch probability threshold in percentage"
+ "to be considered very likely"),
+ cl::init(80), cl::Hidden);
+
+cl::opt<unsigned> ProfileLikelyProb(
+ "profile-likely-prob",
+ cl::desc("branch probability threshold in percentage to be considered"
+ " very likely when profile is available"),
+ cl::init(51), cl::Hidden);
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+void MachineBranchProbabilityInfo::anchor() {}
+
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+ const MachineBasicBlock *Src,
+ MachineBasicBlock::const_succ_iterator Dst) const {
+ return Src->getSuccProbability(Dst);
+}
+
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+ const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
+ // This is a linear search. Try to use the const_succ_iterator version when
+ // possible.
+ return getEdgeProbability(Src,
+ std::find(Src->succ_begin(), Src->succ_end(), Dst));
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(
+ const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
+ BranchProbability HotProb(StaticLikelyProb, 100);
+ return getEdgeProbability(Src, Dst) > HotProb;
+}
+
+MachineBasicBlock *
+MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
+ auto MaxProb = BranchProbability::getZero();
+ MachineBasicBlock *MaxSucc = nullptr;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ auto Prob = getEdgeProbability(MBB, I);
+ if (Prob > MaxProb) {
+ MaxProb = Prob;
+ MaxSucc = *I;
+ }
+ }
+
+ BranchProbability HotProb(StaticLikelyProb, 100);
+ if (getEdgeProbability(MBB, MaxSucc) >= HotProb)
+ return MaxSucc;
+
+ return nullptr;
+}
+
+raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability(
+ raw_ostream &OS, const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+
+ const BranchProbability Prob = getEdgeProbability(Src, Dst);
+ OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+ << " probability is " << Prob
+ << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+ return OS;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 000000000000..1209f73d9601
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,716 @@
+//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-cse"
+
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+STATISTIC(NumPhysCSEs,
+ "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+ "Number of cross-MBB physreg referencing CS eliminated");
+STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
+
+namespace {
+ class MachineCSE : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ AliasAnalysis *AA;
+ MachineDominatorTree *DT;
+ MachineRegisterInfo *MRI;
+ public:
+ static char ID; // Pass identification
+ MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) {
+ initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+
+ void releaseMemory() override {
+ ScopeMap.clear();
+ Exps.clear();
+ }
+
+ private:
+ unsigned LookAheadLimit;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+ typedef ScopedHashTable<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+ typedef ScopedHTType::ScopeTy ScopeType;
+ DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
+ ScopedHTType VNT;
+ SmallVector<MachineInstr*, 64> Exps;
+ unsigned CurrVN;
+
+ bool PerformTrivialCopyPropagation(MachineInstr *MI,
+ MachineBasicBlock *MBB);
+ bool isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const;
+ bool hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVectorImpl<unsigned> &PhysDefs,
+ bool &PhysUseDef) const;
+ bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVectorImpl<unsigned> &PhysDefs,
+ bool &NonLocal) const;
+ bool isCSECandidate(MachineInstr *MI);
+ bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ MachineInstr *CSMI, MachineInstr *MI);
+ void EnterScope(MachineBasicBlock *MBB);
+ void ExitScope(MachineBasicBlock *MBB);
+ bool ProcessBlock(MachineBasicBlock *MBB);
+ void ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
+ bool PerformCSE(MachineDomTreeNode *Node);
+ };
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+char &llvm::MachineCSEID = MachineCSE::ID;
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+
+/// The source register of a COPY machine instruction can be propagated to all
+/// its users, and this propagation could increase the probability of finding
+/// common subexpressions. If the COPY has only one user, the COPY itself can
+/// be removed.
+bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI->isCopy())
+ continue;
+ unsigned SrcReg = DefMI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ continue;
+ if (DefMI->getOperand(0).getSubReg())
+ continue;
+ // FIXME: We should trivially coalesce subregister copies to expose CSE
+ // opportunities on instructions with truncated operands (see
+ // cse-add-with-overflow.ll). This can be done here as follows:
+ // if (SrcSubReg)
+ // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
+ // SrcSubReg);
+ // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
+ //
+ // The 2-addr pass has been updated to handle coalesced subregs. However,
+ // some machine-specific code still can't handle it.
+ // To handle it properly we also need a way find a constrained subregister
+ // class given a super-reg class and subreg index.
+ if (DefMI->getOperand(1).getSubReg())
+ continue;
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (!MRI->constrainRegClass(SrcReg, RC))
+ continue;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ // Propagate SrcReg of copies to MI.
+ MO.setReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ // Coalesce single use copies.
+ if (OnlyOneUse) {
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ }
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool
+MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const {
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I->isDebugValue())
+ ++I;
+
+ if (I == E)
+ // Reached end of block, register is obviously dead.
+ return true;
+
+ bool SeenDef = false;
+ for (const MachineOperand &MO : I->operands()) {
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ SeenDef = true;
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (!TRI->regsOverlap(MO.getReg(), Reg))
+ continue;
+ if (MO.isUse())
+ // Found a use!
+ return false;
+ SeenDef = true;
+ }
+ if (SeenDef)
+ // See a def of Reg (or an alias) before encountering any use, it's
+ // trivially dead.
+ return true;
+
+ --LookAheadLeft;
+ ++I;
+ }
+ return false;
+}
+
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
+/// physical registers (except for dead defs of physical registers). It also
+/// returns the physical register def by reference if it's the only one and the
+/// instruction does not uses a physical register.
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVectorImpl<unsigned> &PhysDefs,
+ bool &PhysUseDef) const{
+ // First, add all uses to PhysRefs.
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // Reading constant physregs is ok.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
+ }
+
+ // Next, collect all defs into PhysDefs. If any is already in PhysRefs
+ // (which currently contains only uses), set the PhysUseDef flag.
+ PhysUseDef = false;
+ MachineBasicBlock::const_iterator I = MI; I = std::next(I);
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // Check against PhysRefs even if the def is "dead".
+ if (PhysRefs.count(Reg))
+ PhysUseDef = true;
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
+ // common since this pass is run before livevariables. We can scan
+ // forward a few instructions and check if it is obviously dead.
+ if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
+ PhysDefs.push_back(Reg);
+ }
+
+ // Finally, add all defs to PhysRefs as well.
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
+ for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+ PhysRefs.insert(*AI);
+
+ return !PhysRefs.empty();
+}
+
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs,
+ SmallVectorImpl<unsigned> &PhysDefs,
+ bool &NonLocal) const {
+ // For now conservatively returns false if the common subexpression is
+ // not in the same basic block as the given instruction. The only exception
+ // is if the common subexpression is in the sole predecessor block.
+ const MachineBasicBlock *MBB = MI->getParent();
+ const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+ bool CrossMBB = false;
+ if (CSMBB != MBB) {
+ if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
+ return false;
+
+ for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
+ if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
+ // Avoid extending live range of physical registers if they are
+ //allocatable or reserved.
+ return false;
+ }
+ CrossMBB = true;
+ }
+ MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
+ MachineBasicBlock::const_iterator E = MI;
+ MachineBasicBlock::const_iterator EE = CSMBB->end();
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I != EE && I->isDebugValue())
+ ++I;
+
+ if (I == EE) {
+ assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+ (void)CrossMBB;
+ CrossMBB = false;
+ NonLocal = true;
+ I = MBB->begin();
+ EE = MBB->end();
+ continue;
+ }
+
+ if (I == E)
+ return true;
+
+ for (const MachineOperand &MO : I->operands()) {
+ // RegMasks go on instructions like calls that clobber lots of physregs.
+ // Don't attempt to CSE across such an instruction.
+ if (MO.isRegMask())
+ return false;
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ continue;
+ if (PhysRefs.count(MOReg))
+ return false;
+ }
+
+ --LookAheadLeft;
+ ++I;
+ }
+
+ return false;
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+ if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
+ MI->isInlineAsm() || MI->isDebugValue())
+ return false;
+
+ // Ignore copies.
+ if (MI->isCopyLike())
+ return false;
+
+ // Ignore stuff that we obviously can't move.
+ if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ if (MI->mayLoad()) {
+ // Okay, this instruction does a load. As a refinement, we allow the target
+ // to decide whether the loaded value is actually a constant. If so, we can
+ // actually use it as a load.
+ if (!MI->isInvariantLoad(AA))
+ // FIXME: we should be able to hoist loads with no other side effects if
+ // there are no other instructions which can change memory in this loop.
+ // This is a trivial form of alias analysis.
+ return false;
+ }
+
+ // Ignore stack guard loads, otherwise the register that holds CSEed value may
+ // be spilled and get loaded back with corrupted data.
+ if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
+ return false;
+
+ return true;
+}
+
+/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
+/// common expression that defines Reg.
+bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ MachineInstr *CSMI, MachineInstr *MI) {
+ // FIXME: Heuristics that works around the lack the live range splitting.
+
+ // If CSReg is used at all uses of Reg, CSE should not increase register
+ // pressure of CSReg.
+ bool MayIncreasePressure = true;
+ if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
+ TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MayIncreasePressure = false;
+ SmallPtrSet<MachineInstr*, 8> CSUses;
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
+ CSUses.insert(&MI);
+ }
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+ if (!CSUses.count(&MI)) {
+ MayIncreasePressure = true;
+ break;
+ }
+ }
+ }
+ if (!MayIncreasePressure) return true;
+
+ // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+ // an immediate predecessor. We don't want to increase register pressure and
+ // end up causing other computation to be spilled.
+ if (TII->isAsCheapAsAMove(*MI)) {
+ MachineBasicBlock *CSBB = CSMI->getParent();
+ MachineBasicBlock *BB = MI->getParent();
+ if (CSBB != BB && !CSBB->isSuccessor(BB))
+ return false;
+ }
+
+ // Heuristics #2: If the expression doesn't not use a vr and the only use
+ // of the redundant computation are copies, do not cse.
+ bool HasVRegUse = false;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && MO.isUse() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ HasVRegUse = true;
+ break;
+ }
+ }
+ if (!HasVRegUse) {
+ bool HasNonCopyUse = false;
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+ // Ignore copies.
+ if (!MI.isCopyLike()) {
+ HasNonCopyUse = true;
+ break;
+ }
+ }
+ if (!HasNonCopyUse)
+ return false;
+ }
+
+ // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
+ // it unless the defined value is already used in the BB of the new use.
+ bool HasPHI = false;
+ SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
+ for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
+ HasPHI |= MI.isPHI();
+ CSBBs.insert(MI.getParent());
+ }
+
+ if (!HasPHI)
+ return true;
+ return CSBBs.count(MI->getParent());
+}
+
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+ ScopeType *Scope = new ScopeType(VNT);
+ ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+ DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+ assert(SI != ScopeMap.end());
+ delete SI->second;
+ ScopeMap.erase(SI);
+}
+
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+ SmallVector<unsigned, 2> ImplicitDefsToUpdate;
+ SmallVector<unsigned, 2> ImplicitDefs;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (!isCSECandidate(MI))
+ continue;
+
+ bool FoundCSE = VNT.count(MI);
+ if (!FoundCSE) {
+ // Using trivial copy propagation to find more CSE opportunities.
+ if (PerformTrivialCopyPropagation(MI, MBB)) {
+ Changed = true;
+
+ // After coalescing MI itself may become a copy.
+ if (MI->isCopyLike())
+ continue;
+
+ // Try again to see if CSE is possible.
+ FoundCSE = VNT.count(MI);
+ }
+ }
+
+ // Commute commutable instructions.
+ bool Commuted = false;
+ if (!FoundCSE && MI->isCommutable()) {
+ if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
+ Commuted = true;
+ FoundCSE = VNT.count(NewMI);
+ if (NewMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ Changed = true;
+ } else if (!FoundCSE)
+ // MI was changed but it didn't help, commute it back!
+ (void)TII->commuteInstruction(*MI);
+ }
+ }
+
+ // If the instruction defines physical registers and the values *may* be
+ // used, then it's not safe to replace it with a common subexpression.
+ // It's also not safe if the instruction uses physical registers.
+ bool CrossMBBPhysDef = false;
+ SmallSet<unsigned, 8> PhysRefs;
+ SmallVector<unsigned, 2> PhysDefs;
+ bool PhysUseDef = false;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
+ PhysDefs, PhysUseDef)) {
+ FoundCSE = false;
+
+ // ... Unless the CS is local or is in the sole predecessor block
+ // and it also defines the physical register which is not clobbered
+ // in between and the physical register uses were not clobbered.
+ // This can never be the case if the instruction both uses and
+ // defines the same physical register, which was detected above.
+ if (!PhysUseDef) {
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+ FoundCSE = true;
+ }
+ }
+
+ if (!FoundCSE) {
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ continue;
+ }
+
+ // Found a common subexpression, eliminate it.
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ DEBUG(dbgs() << "Examining: " << *MI);
+ DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+
+ // Check if it's profitable to perform this CSE.
+ bool DoCSE = true;
+ unsigned NumDefs = MI->getDesc().getNumDefs() +
+ MI->getDesc().getNumImplicitDefs();
+
+ for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned OldReg = MO.getReg();
+ unsigned NewReg = CSMI->getOperand(i).getReg();
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+ ImplicitDefsToUpdate.push_back(i);
+
+ // Keep track of implicit defs of CSMI and MI, to clear possibly
+ // made-redundant kill flags.
+ if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
+ ImplicitDefs.push_back(OldReg);
+
+ if (OldReg == NewReg) {
+ --NumDefs;
+ continue;
+ }
+
+ assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
+ TargetRegisterInfo::isVirtualRegister(NewReg) &&
+ "Do not CSE physical register defs!");
+
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
+ DoCSE = false;
+ break;
+ }
+
+ // Don't perform CSE if the result of the old instruction cannot exist
+ // within the register class of the new instruction.
+ const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
+ if (!MRI->constrainRegClass(NewReg, OldRC)) {
+ DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
+ DoCSE = false;
+ break;
+ }
+
+ CSEPairs.push_back(std::make_pair(OldReg, NewReg));
+ --NumDefs;
+ }
+
+ // Actually perform the elimination.
+ if (DoCSE) {
+ for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
+ unsigned OldReg = CSEPair.first;
+ unsigned NewReg = CSEPair.second;
+ // OldReg may have been unused but is used now, clear the Dead flag
+ MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
+ assert(Def != nullptr && "CSEd register has no unique definition?");
+ Def->clearRegisterDeads(NewReg);
+ // Replace with NewReg and clear kill flags which may be wrong now.
+ MRI->replaceRegWith(OldReg, NewReg);
+ MRI->clearKillFlags(NewReg);
+ }
+
+ // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+ // we should make sure it is not dead at CSMI.
+ for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
+ CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
+
+ // Go through implicit defs of CSMI and MI, and clear the kill flags on
+ // their uses in all the instructions between CSMI and MI.
+ // We might have made some of the kill flags redundant, consider:
+ // subs ... %NZCV<imp-def> <- CSMI
+ // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore
+ // subs ... %NZCV<imp-def> <- MI, to be eliminated
+ // csinc ... %NZCV<imp-use,kill>
+ // Since we eliminated MI, and reused a register imp-def'd by CSMI
+ // (here %NZCV), that register, if it was killed before MI, should have
+ // that kill flag removed, because it's lifetime was extended.
+ if (CSMI->getParent() == MI->getParent()) {
+ for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
+ for (auto ImplicitDef : ImplicitDefs)
+ if (MachineOperand *MO = II->findRegisterUseOperand(
+ ImplicitDef, /*isKill=*/true, TRI))
+ MO->setIsKill(false);
+ } else {
+ // If the instructions aren't in the same BB, bail out and clear the
+ // kill flag on all uses of the imp-def'd register.
+ for (auto ImplicitDef : ImplicitDefs)
+ MRI->clearKillFlags(ImplicitDef);
+ }
+
+ if (CrossMBBPhysDef) {
+ // Add physical register defs now coming in from a predecessor to MBB
+ // livein list.
+ while (!PhysDefs.empty()) {
+ unsigned LiveIn = PhysDefs.pop_back_val();
+ if (!MBB->isLiveIn(LiveIn))
+ MBB->addLiveIn(LiveIn);
+ }
+ ++NumCrossBBCSEs;
+ }
+
+ MI->eraseFromParent();
+ ++NumCSEs;
+ if (!PhysRefs.empty())
+ ++NumPhysCSEs;
+ if (Commuted)
+ ++NumCommutes;
+ Changed = true;
+ } else {
+ VNT.insert(MI, CurrVN++);
+ Exps.push_back(MI);
+ }
+ CSEPairs.clear();
+ ImplicitDefsToUpdate.clear();
+ ImplicitDefs.clear();
+ }
+
+ return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
+ if (OpenChildren[Node])
+ return;
+
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = Node->getIDom()) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ CurrVN = 0;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(Node);
+ do {
+ Node = WorkList.pop_back_val();
+ Scopes.push_back(Node);
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ OpenChildren[Node] = Children.size();
+ for (MachineDomTreeNode *Child : Children)
+ WorkList.push_back(Child);
+ } while (!WorkList.empty());
+
+ // Now perform CSE.
+ bool Changed = false;
+ for (MachineDomTreeNode *Node : Scopes) {
+ MachineBasicBlock *MBB = Node->getBlock();
+ EnterScope(MBB);
+ Changed |= ProcessBlock(MBB);
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren);
+ }
+
+ return Changed;
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LookAheadLimit = TII->getMachineCSELookAheadLimit();
+ return PerformCSE(DT->getRootNode());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
new file mode 100644
index 000000000000..6b5c6ba82506
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -0,0 +1,477 @@
+//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The machine combiner pass uses machine trace metrics to ensure the combined
+// instructions does not lengthen the critical path or the resource depth.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-combiner"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumInstCombined, "Number of machineinst combined");
+
+namespace {
+class MachineCombiner : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MCSchedModel SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineLoopInfo *MLI; // Current MachineLoopInfo
+ MachineTraceMetrics *Traces;
+ MachineTraceMetrics::Ensemble *MinInstr;
+
+ TargetSchedModel TSchedModel;
+
+ /// True if optimizing for code size.
+ bool OptSize;
+
+public:
+ static char ID;
+ MachineCombiner() : MachineFunctionPass(ID) {
+ initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const char *getPassName() const override { return "Machine InstCombiner"; }
+
+private:
+ bool doSubstitute(unsigned NewSize, unsigned OldSize);
+ bool combineInstructions(MachineBasicBlock *);
+ MachineInstr *getOperandDef(const MachineOperand &MO);
+ unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace);
+ unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace);
+ bool
+ improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineCombinerPattern Pattern);
+ bool preservesResourceLen(MachineBasicBlock *MBB,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs);
+ void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
+};
+}
+
+char MachineCombiner::ID = 0;
+char &llvm::MachineCombinerID = MachineCombiner::ID;
+
+INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
+ "Machine InstCombiner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
+INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
+ false, false)
+
+void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineTraceMetrics>();
+ AU.addPreserved<MachineTraceMetrics>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
+ MachineInstr *DefInstr = nullptr;
+ // We need a virtual register definition.
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ DefInstr = MRI->getUniqueVRegDef(MO.getReg());
+ // PHI's have no depth etc.
+ if (DefInstr && DefInstr->isPHI())
+ DefInstr = nullptr;
+ return DefInstr;
+}
+
+/// Computes depth of instructions in vector \InsInstr.
+///
+/// \param InsInstrs is a vector of machine instructions
+/// \param InstrIdxForVirtReg is a dense map of virtual register to index
+/// of defining machine instruction in \p InsInstrs
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Depth of last instruction in \InsInstrs ("NewRoot")
+unsigned
+MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineTraceMetrics::Trace BlockTrace) {
+ SmallVector<unsigned, 16> InstrDepth;
+ assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+ "Missing machine model\n");
+
+ // For each instruction in the new sequence compute the depth based on the
+ // operands. Use the trace information when possible. For new operands which
+ // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
+ for (auto *InstrPtr : InsInstrs) { // for each Use
+ unsigned IDepth = 0;
+ DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
+ for (const MachineOperand &MO : InstrPtr->operands()) {
+ // Check for virtual register operand.
+ if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ continue;
+ if (!MO.isUse())
+ continue;
+ unsigned DepthOp = 0;
+ unsigned LatencyOp = 0;
+ DenseMap<unsigned, unsigned>::iterator II =
+ InstrIdxForVirtReg.find(MO.getReg());
+ if (II != InstrIdxForVirtReg.end()) {
+ // Operand is new virtual register not in trace
+ assert(II->second < InstrDepth.size() && "Bad Index");
+ MachineInstr *DefInstr = InsInstrs[II->second];
+ assert(DefInstr &&
+ "There must be a definition for a new virtual register");
+ DepthOp = InstrDepth[II->second];
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ } else {
+ MachineInstr *DefInstr = getOperandDef(MO);
+ if (DefInstr) {
+ DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
+ LatencyOp = TSchedModel.computeOperandLatency(
+ DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()),
+ InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg()));
+ }
+ }
+ IDepth = std::max(IDepth, DepthOp + LatencyOp);
+ }
+ InstrDepth.push_back(IDepth);
+ }
+ unsigned NewRootIdx = InsInstrs.size() - 1;
+ return InstrDepth[NewRootIdx];
+}
+
+/// Computes instruction latency as max of latency of defined operands.
+///
+/// \param Root is a machine instruction that could be replaced by NewRoot.
+/// It is used to compute a more accurate latency information for NewRoot in
+/// case there is a dependent instruction in the same trace (\p BlockTrace)
+/// \param NewRoot is the instruction for which the latency is computed
+/// \param BlockTrace is a trace of machine instructions
+///
+/// \returns Latency of \p NewRoot
+unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
+ MachineTraceMetrics::Trace BlockTrace) {
+ assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+ "Missing machine model\n");
+
+ // Check each definition in NewRoot and compute the latency
+ unsigned NewRootLatency = 0;
+
+ for (const MachineOperand &MO : NewRoot->operands()) {
+ // Check for virtual register operand.
+ if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ continue;
+ if (!MO.isDef())
+ continue;
+ // Get the first instruction that uses MO
+ MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
+ RI++;
+ MachineInstr *UseMO = RI->getParent();
+ unsigned LatencyOp = 0;
+ if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) {
+ LatencyOp = TSchedModel.computeOperandLatency(
+ NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
+ UseMO->findRegisterUseOperandIdx(MO.getReg()));
+ } else {
+ LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
+ }
+ NewRootLatency = std::max(NewRootLatency, LatencyOp);
+ }
+ return NewRootLatency;
+}
+
+/// The combiner's goal may differ based on which pattern it is attempting
+/// to optimize.
+enum class CombinerObjective {
+ MustReduceDepth, // The data dependency chain must be improved.
+ Default // The critical path must not be lengthened.
+};
+
+static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
+ // TODO: If C++ ever gets a real enum class, make this part of the
+ // MachineCombinerPattern class.
+ switch (P) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ return CombinerObjective::MustReduceDepth;
+ default:
+ return CombinerObjective::Default;
+ }
+}
+
+/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
+/// The new code sequence ends in MI NewRoot. A necessary condition for the new
+/// sequence to replace the old sequence is that it cannot lengthen the critical
+/// path. The definition of "improve" may be restricted by specifying that the
+/// new path improves the data dependency chain (MustReduceDepth).
+bool MachineCombiner::improvesCriticalPathLen(
+ MachineBasicBlock *MBB, MachineInstr *Root,
+ MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+ MachineCombinerPattern Pattern) {
+ assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+ "Missing machine model\n");
+ // NewRoot is the last instruction in the \p InsInstrs vector.
+ unsigned NewRootIdx = InsInstrs.size() - 1;
+ MachineInstr *NewRoot = InsInstrs[NewRootIdx];
+
+ // Get depth and latency of NewRoot and Root.
+ unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
+ unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
+
+ DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+ dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
+ dbgs() << " RootDepth: " << RootDepth << "\n");
+
+ // For a transform such as reassociation, the cost equation is
+ // conservatively calculated so that we must improve the depth (data
+ // dependency cycles) in the critical path to proceed with the transform.
+ // Being conservative also protects against inaccuracies in the underlying
+ // machine trace metrics and CPU models.
+ if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth)
+ return NewRootDepth < RootDepth;
+
+ // A more flexible cost calculation for the critical path includes the slack
+ // of the original code sequence. This may allow the transform to proceed
+ // even if the instruction depths (data dependency cycles) become worse.
+ unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
+ unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
+ unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
+
+ DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
+ dbgs() << " RootLatency: " << RootLatency << "\n";
+ dbgs() << " RootSlack: " << RootSlack << "\n";
+ dbgs() << " NewRootDepth + NewRootLatency = "
+ << NewRootDepth + NewRootLatency << "\n";
+ dbgs() << " RootDepth + RootLatency + RootSlack = "
+ << RootDepth + RootLatency + RootSlack << "\n";);
+
+ unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+ unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
+
+ return NewCycleCount <= OldCycleCount;
+}
+
+/// helper routine to convert instructions into SC
+void MachineCombiner::instr2instrSC(
+ SmallVectorImpl<MachineInstr *> &Instrs,
+ SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
+ for (auto *InstrPtr : Instrs) {
+ unsigned Opc = InstrPtr->getOpcode();
+ unsigned Idx = TII->get(Opc).getSchedClass();
+ const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
+ InstrsSC.push_back(SC);
+ }
+}
+
+/// True when the new instructions do not increase resource length
+bool MachineCombiner::preservesResourceLen(
+ MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs) {
+ if (!TSchedModel.hasInstrSchedModel())
+ return true;
+
+ // Compute current resource length
+
+ //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
+ SmallVector <const MachineBasicBlock *, 1> MBBarr;
+ MBBarr.push_back(MBB);
+ unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
+
+ // Deal with SC rather than Instructions.
+ SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC;
+ SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC;
+
+ instr2instrSC(InsInstrs, InsInstrsSC);
+ instr2instrSC(DelInstrs, DelInstrsSC);
+
+ ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
+ ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
+
+ // Compute new resource length.
+ unsigned ResLenAfterCombine =
+ BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
+
+ DEBUG(dbgs() << "RESOURCE DATA: \n";
+ dbgs() << " resource len before: " << ResLenBeforeCombine
+ << " after: " << ResLenAfterCombine << "\n";);
+
+ return ResLenAfterCombine <= ResLenBeforeCombine;
+}
+
+/// \returns true when new instruction sequence should be generated
+/// independent if it lengthens critical path or not
+bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
+ if (OptSize && (NewSize < OldSize))
+ return true;
+ if (!TSchedModel.hasInstrSchedModelOrItineraries())
+ return true;
+ return false;
+}
+
+/// Substitute a slow code sequence with a faster one by
+/// evaluating instruction combining pattern.
+/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
+/// combining based on machine trace metrics. Only combine a sequence of
+/// instructions when this neither lengthens the critical path nor increases
+/// resource pressure. When optimizing for codesize always combine when the new
+/// sequence is shorter.
+bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
+
+ auto BlockIter = MBB->begin();
+ // Check if the block is in a loop.
+ const MachineLoop *ML = MLI->getLoopFor(MBB);
+
+ while (BlockIter != MBB->end()) {
+ auto &MI = *BlockIter++;
+
+ DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
+ SmallVector<MachineCombinerPattern, 16> Patterns;
+ // The motivating example is:
+ //
+ // MUL Other MUL_op1 MUL_op2 Other
+ // \ / \ | /
+ // ADD/SUB => MADD/MSUB
+ // (=Root) (=NewRoot)
+
+ // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
+ // usually beneficial for code size it unfortunately can hurt performance
+ // when the ADD is on the critical path, but the MUL is not. With the
+ // substitution the MUL becomes part of the critical path (in form of the
+ // MADD) and can lengthen it on architectures where the MADD latency is
+ // longer than the ADD latency.
+ //
+ // For each instruction we check if it can be the root of a combiner
+ // pattern. Then for each pattern the new code sequence in form of MI is
+ // generated and evaluated. When the efficiency criteria (don't lengthen
+ // critical path, don't use more resources) is met the new sequence gets
+ // hooked up into the basic block before the old sequence is removed.
+ //
+ // The algorithm does not try to evaluate all patterns and pick the best.
+ // This is only an artificial restriction though. In practice there is
+ // mostly one pattern, and getMachineCombinerPatterns() can order patterns
+ // based on an internal cost heuristic.
+
+ if (!TII->getMachineCombinerPatterns(MI, Patterns))
+ continue;
+
+ for (auto P : Patterns) {
+ SmallVector<MachineInstr *, 16> InsInstrs;
+ SmallVector<MachineInstr *, 16> DelInstrs;
+ DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+ if (!MinInstr)
+ MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ Traces->verifyAnalysis();
+ TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ unsigned NewInstCount = InsInstrs.size();
+ unsigned OldInstCount = DelInstrs.size();
+ // Found pattern, but did not generate alternative sequence.
+ // This can happen e.g. when an immediate could not be materialized
+ // in a single instruction.
+ if (!NewInstCount)
+ continue;
+
+ bool SubstituteAlways = false;
+ if (ML && TII->isThroughputPattern(P))
+ SubstituteAlways = true;
+
+ // Substitute when we optimize for codesize and the new sequence has
+ // fewer instructions OR
+ // the new sequence neither lengthens the critical path nor increases
+ // resource pressure.
+ if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
+ (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
+ InstrIdxForVirtReg, P) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
+ for (auto *InstrPtr : DelInstrs)
+ InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+
+ Changed = true;
+ ++NumInstCombined;
+
+ Traces->invalidate(MBB);
+ Traces->verifyAnalysis();
+ // Eagerly stop after the first pattern fires.
+ break;
+ } else {
+ // Cleanup instructions of the alternative code sequence. There is no
+ // use for them.
+ MachineFunction *MF = MBB->getParent();
+ for (auto *InstrPtr : InsInstrs)
+ MF->DeleteMachineInstr(InstrPtr);
+ }
+ InstrIdxForVirtReg.clear();
+ }
+ }
+
+ return Changed;
+}
+
+bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ SchedModel = STI.getSchedModel();
+ TSchedModel.init(SchedModel, &STI, TII);
+ MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ Traces = &getAnalysis<MachineTraceMetrics>();
+ MinInstr = nullptr;
+ OptSize = MF.getFunction()->optForSize();
+
+ DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
+ if (!TII->useMachineCombiner()) {
+ DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n");
+ return false;
+ }
+
+ bool Changed = false;
+
+ // Try to combine instructions.
+ for (auto &MBB : MF)
+ Changed |= combineInstructions(&MBB);
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644
index 000000000000..8fdf39d54bd0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -0,0 +1,371 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "codegen-cp"
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+
+namespace {
+ typedef SmallVector<unsigned, 4> RegList;
+ typedef DenseMap<unsigned, RegList> SourceMap;
+ typedef DenseMap<unsigned, MachineInstr*> Reg2MIMap;
+
+ class MachineCopyPropagation : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ const MachineRegisterInfo *MRI;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineCopyPropagation() : MachineFunctionPass(ID) {
+ initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
+ private:
+ void ClobberRegister(unsigned Reg);
+ void CopyPropagateBlock(MachineBasicBlock &MBB);
+ bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
+
+ /// Candidates for deletion.
+ SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;
+ /// Def -> available copies map.
+ Reg2MIMap AvailCopyMap;
+ /// Def -> copies map.
+ Reg2MIMap CopyMap;
+ /// Src -> Def map
+ SourceMap SrcMap;
+ bool Changed;
+ };
+}
+char MachineCopyPropagation::ID = 0;
+char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
+
+INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+ "Machine Copy Propagation Pass", false, false)
+
+/// Remove any entry in \p Map where the register is a subregister or equal to
+/// a register contained in \p Regs.
+static void removeRegsFromMap(Reg2MIMap &Map, const RegList &Regs,
+ const TargetRegisterInfo &TRI) {
+ for (unsigned Reg : Regs) {
+ // Source of copy is no longer available for propagation.
+ for (MCSubRegIterator SR(Reg, &TRI, true); SR.isValid(); ++SR)
+ Map.erase(*SR);
+ }
+}
+
+/// Remove any entry in \p Map that is marked clobbered in \p RegMask.
+/// The map will typically have a lot fewer entries than the regmask clobbers,
+/// so this is more efficient than iterating the clobbered registers and calling
+/// ClobberRegister() on them.
+static void removeClobberedRegsFromMap(Reg2MIMap &Map,
+ const MachineOperand &RegMask) {
+ for (Reg2MIMap::iterator I = Map.begin(), E = Map.end(), Next; I != E;
+ I = Next) {
+ Next = std::next(I);
+ unsigned Reg = I->first;
+ if (RegMask.clobbersPhysReg(Reg))
+ Map.erase(I);
+ }
+}
+
+void MachineCopyPropagation::ClobberRegister(unsigned Reg) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ CopyMap.erase(*AI);
+ AvailCopyMap.erase(*AI);
+
+ SourceMap::iterator SI = SrcMap.find(*AI);
+ if (SI != SrcMap.end()) {
+ removeRegsFromMap(AvailCopyMap, SI->second, *TRI);
+ SrcMap.erase(SI);
+ }
+ }
+}
+
+/// Return true if \p PreviousCopy did copy register \p Src to register \p Def.
+/// This fact may have been obscured by sub register usage or may not be true at
+/// all even though Src and Def are subregisters of the registers used in
+/// PreviousCopy. e.g.
+/// isNopCopy("ecx = COPY eax", AX, CX) == true
+/// isNopCopy("ecx = COPY eax", AH, CL) == false
+static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
+ unsigned Def, const TargetRegisterInfo *TRI) {
+ unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg();
+ unsigned PreviousDef = PreviousCopy.getOperand(0).getReg();
+ if (Src == PreviousSrc) {
+ assert(Def == PreviousDef);
+ return true;
+ }
+ if (!TRI->isSubRegister(PreviousSrc, Src))
+ return false;
+ unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src);
+ return SubIdx == TRI->getSubRegIndex(PreviousDef, Def);
+}
+
+/// Remove instruction \p Copy if there exists a previous copy that copies the
+/// register \p Src to the register \p Def; This may happen indirectly by
+/// copying the super registers.
+bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
+ unsigned Def) {
+ // Avoid eliminating a copy from/to a reserved registers as we cannot predict
+ // the value (Example: The sparc zero register is writable but stays zero).
+ if (MRI->isReserved(Src) || MRI->isReserved(Def))
+ return false;
+
+ // Search for an existing copy.
+ Reg2MIMap::iterator CI = AvailCopyMap.find(Def);
+ if (CI == AvailCopyMap.end())
+ return false;
+
+ // Check that the existing copy uses the correct sub registers.
+ MachineInstr &PrevCopy = *CI->second;
+ if (!isNopCopy(PrevCopy, Src, Def, TRI))
+ return false;
+
+ DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
+
+ // Copy was redundantly redefining either Src or Def. Remove earlier kill
+ // flags between Copy and PrevCopy because the value will be reused now.
+ assert(Copy.isCopy());
+ unsigned CopyDef = Copy.getOperand(0).getReg();
+ assert(CopyDef == Src || CopyDef == Def);
+ for (MachineInstr &MI :
+ make_range(PrevCopy.getIterator(), Copy.getIterator()))
+ MI.clearRegisterKills(CopyDef, TRI);
+
+ Copy.eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ return true;
+}
+
+void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+ DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (MI->isCopy()) {
+ unsigned Def = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+
+ assert(!TargetRegisterInfo::isVirtualRegister(Def) &&
+ !TargetRegisterInfo::isVirtualRegister(Src) &&
+ "MachineCopyPropagation should be run after register allocation!");
+
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ECX<def> = COPY %EAX
+ // ... nothing clobbered EAX.
+ // %EAX<def> = COPY %ECX
+ // =>
+ // %ECX<def> = COPY %EAX
+ //
+ // or
+ //
+ // %ECX<def> = COPY %EAX
+ // ... nothing clobbered EAX.
+ // %ECX<def> = COPY %EAX
+ // =>
+ // %ECX<def> = COPY %EAX
+ if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
+ continue;
+
+ // If Src is defined by a previous copy, the previous copy cannot be
+ // eliminated.
+ for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
+ Reg2MIMap::iterator CI = CopyMap.find(*AI);
+ if (CI != CopyMap.end()) {
+ DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump());
+ MaybeDeadCopies.remove(CI->second);
+ }
+ }
+
+ DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
+
+ // Copy is now a candidate for deletion.
+ if (!MRI->isReserved(Def))
+ MaybeDeadCopies.insert(MI);
+
+ // If 'Def' is previously source of another copy, then this earlier copy's
+ // source is no longer available. e.g.
+ // %xmm9<def> = copy %xmm2
+ // ...
+ // %xmm2<def> = copy %xmm0
+ // ...
+ // %xmm2<def> = copy %xmm9
+ ClobberRegister(Def);
+
+ // Remember Def is defined by the copy.
+ for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid();
+ ++SR) {
+ CopyMap[*SR] = MI;
+ AvailCopyMap[*SR] = MI;
+ }
+
+ // Remember source that's copied to Def. Once it's clobbered, then
+ // it's no longer available for copy propagation.
+ RegList &DestList = SrcMap[Src];
+ if (std::find(DestList.begin(), DestList.end(), Def) == DestList.end())
+ DestList.push_back(Def);
+
+ continue;
+ }
+
+ // Not a copy.
+ SmallVector<unsigned, 2> Defs;
+ const MachineOperand *RegMask = nullptr;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isRegMask())
+ RegMask = &MO;
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ assert(!TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "MachineCopyPropagation should be run after register allocation!");
+
+ if (MO.isDef()) {
+ Defs.push_back(Reg);
+ continue;
+ }
+
+ // If 'Reg' is defined by a copy, the copy is no longer a candidate
+ // for elimination.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ Reg2MIMap::iterator CI = CopyMap.find(*AI);
+ if (CI != CopyMap.end()) {
+ DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
+ MaybeDeadCopies.remove(CI->second);
+ }
+ }
+ // Treat undef use like defs for copy propagation but not for
+ // dead copy. We would need to do a liveness check to be sure the copy
+ // is dead for undef uses.
+ // The backends are allowed to do whatever they want with undef value
+ // and we cannot be sure this register will not be rewritten to break
+ // some false dependencies for the hardware for instance.
+ if (MO.isUndef())
+ Defs.push_back(Reg);
+ }
+
+ // The instruction has a register mask operand which means that it clobbers
+ // a large set of registers. Treat clobbered registers the same way as
+ // defined registers.
+ if (RegMask) {
+ // Erase any MaybeDeadCopies whose destination register is clobbered.
+ for (SmallSetVector<MachineInstr *, 8>::iterator DI =
+ MaybeDeadCopies.begin();
+ DI != MaybeDeadCopies.end();) {
+ MachineInstr *MaybeDead = *DI;
+ unsigned Reg = MaybeDead->getOperand(0).getReg();
+ assert(!MRI->isReserved(Reg));
+
+ if (!RegMask->clobbersPhysReg(Reg)) {
+ ++DI;
+ continue;
+ }
+
+ DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
+ MaybeDead->dump());
+
+ // erase() will return the next valid iterator pointing to the next
+ // element after the erased one.
+ DI = MaybeDeadCopies.erase(DI);
+ MaybeDead->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ }
+
+ removeClobberedRegsFromMap(AvailCopyMap, *RegMask);
+ removeClobberedRegsFromMap(CopyMap, *RegMask);
+ for (SourceMap::iterator I = SrcMap.begin(), E = SrcMap.end(), Next;
+ I != E; I = Next) {
+ Next = std::next(I);
+ if (RegMask->clobbersPhysReg(I->first)) {
+ removeRegsFromMap(AvailCopyMap, I->second, *TRI);
+ SrcMap.erase(I);
+ }
+ }
+ }
+
+ // Any previous copy definition or reading the Defs is no longer available.
+ for (unsigned Reg : Defs)
+ ClobberRegister(Reg);
+ }
+
+ // If MBB doesn't have successors, delete the copies whose defs are not used.
+ // If MBB does have successors, then conservative assume the defs are live-out
+ // since we don't want to trust live-in lists.
+ if (MBB.succ_empty()) {
+ for (MachineInstr *MaybeDead : MaybeDeadCopies) {
+ assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
+ MaybeDead->eraseFromParent();
+ Changed = true;
+ ++NumDeletes;
+ }
+ }
+
+ MaybeDeadCopies.clear();
+ AvailCopyMap.clear();
+ CopyMap.clear();
+ SrcMap.clear();
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ Changed = false;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+
+ for (MachineBasicBlock &MBB : MF)
+ CopyPropagateBlock(MBB);
+
+ return Changed;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
new file mode 100644
index 000000000000..acb7c4810b16
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -0,0 +1,54 @@
+//===- MachineDominanceFrontier.cpp ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/Analysis/DominanceFrontierImpl.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+
+
+using namespace llvm;
+
+namespace llvm {
+template class DominanceFrontierBase<MachineBasicBlock>;
+template class ForwardDominanceFrontierBase<MachineBasicBlock>;
+}
+
+
+char MachineDominanceFrontier::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MachineDominanceFrontier, "machine-domfrontier",
+ "Machine Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier",
+ "Machine Dominance Frontier Construction", true, true)
+
+MachineDominanceFrontier::MachineDominanceFrontier()
+ : MachineFunctionPass(ID),
+ Base() {
+ initializeMachineDominanceFrontierPass(*PassRegistry::getPassRegistry());
+}
+
+char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID;
+
+bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ Base.analyze(getAnalysis<MachineDominatorTree>().getBase());
+ return false;
+}
+
+void MachineDominanceFrontier::releaseMemory() {
+ Base.releaseMemory();
+}
+
+void MachineDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 000000000000..303a6a9263be
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,157 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+// Always verify dominfo if expensive checking is enabled.
+#ifdef EXPENSIVE_CHECKS
+static bool VerifyMachineDomInfo = true;
+#else
+static bool VerifyMachineDomInfo = false;
+#endif
+static cl::opt<bool, true> VerifyMachineDomInfoX(
+ "verify-machine-dom-info", cl::location(VerifyMachineDomInfo),
+ cl::desc("Verify machine dominator info (time consuming)"));
+
+namespace llvm {
+template class DomTreeNodeBase<MachineBasicBlock>;
+template class DominatorTreeBase<MachineBasicBlock>;
+}
+
+char MachineDominatorTree::ID = 0;
+
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+ "MachineDominator Tree Construction", true, true)
+
+char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ CriticalEdgesToSplit.clear();
+ NewBBs.clear();
+ DT->recalculate(F);
+
+ return false;
+}
+
+MachineDominatorTree::MachineDominatorTree()
+ : MachineFunctionPass(ID) {
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ DT = new DominatorTreeBase<MachineBasicBlock>(false);
+}
+
+MachineDominatorTree::~MachineDominatorTree() {
+ delete DT;
+}
+
+void MachineDominatorTree::releaseMemory() {
+ DT->releaseMemory();
+}
+
+void MachineDominatorTree::verifyAnalysis() const {
+ if (VerifyMachineDomInfo)
+ verifyDomTree();
+}
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+ DT->print(OS);
+}
+
+void MachineDominatorTree::applySplitCriticalEdges() const {
+ // Bail out early if there is nothing to do.
+ if (CriticalEdgesToSplit.empty())
+ return;
+
+ // For each element in CriticalEdgesToSplit, remember whether or not element
+ // is the new immediate domminator of its successor. The mapping is done by
+ // index, i.e., the information for the ith element of CriticalEdgesToSplit is
+ // the ith element of IsNewIDom.
+ SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true);
+ size_t Idx = 0;
+
+ // Collect all the dominance properties info, before invalidating
+ // the underlying DT.
+ for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+ // Update dominator information.
+ MachineBasicBlock *Succ = Edge.ToBB;
+ MachineDomTreeNode *SuccDTNode = DT->getNode(Succ);
+
+ for (MachineBasicBlock *PredBB : Succ->predecessors()) {
+ if (PredBB == Edge.NewBB)
+ continue;
+ // If we are in this situation:
+ // FromBB1 FromBB2
+ // + +
+ // + + + +
+ // + + + +
+ // ... Split1 Split2 ...
+ // + +
+ // + +
+ // +
+ // Succ
+ // Instead of checking the domiance property with Split2, we check it with
+ // FromBB2 since Split2 is still unknown of the underlying DT structure.
+ if (NewBBs.count(PredBB)) {
+ assert(PredBB->pred_size() == 1 && "A basic block resulting from a "
+ "critical edge split has more "
+ "than one predecessor!");
+ PredBB = *PredBB->pred_begin();
+ }
+ if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) {
+ IsNewIDom[Idx] = false;
+ break;
+ }
+ }
+ ++Idx;
+ }
+
+ // Now, update DT with the collected dominance properties info.
+ Idx = 0;
+ for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+ // We know FromBB dominates NewBB.
+ MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom[Idx])
+ DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode);
+ ++Idx;
+ }
+ NewBBs.clear();
+ CriticalEdgesToSplit.clear();
+}
+
+void MachineDominatorTree::verifyDomTree() const {
+ MachineFunction &F = *getRoot()->getParent();
+
+ MachineDominatorTree OtherDT;
+ OtherDT.DT->recalculate(F);
+ if (compare(OtherDT)) {
+ errs() << "MachineDominatorTree is not up to date!\nComputed:\n";
+ print(errs(), nullptr);
+ errs() << "\nActual:\n";
+ OtherDT.print(errs(), nullptr);
+ abort();
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 000000000000..a7c63ef4c852
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,1002 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function. This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionInitializer.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "codegen"
+
+static cl::opt<unsigned>
+ AlignAllFunctions("align-all-functions",
+ cl::desc("Force the alignment of all functions."),
+ cl::init(0), cl::Hidden);
+
+void MachineFunctionInitializer::anchor() {}
+
+void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const {
+ // Leave this function even in NDEBUG as an out-of-line anchor.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ for (BitVector::size_type i = 0; i < Properties.size(); ++i) {
+ bool HasProperty = Properties[i];
+ if (OnlySet && !HasProperty)
+ continue;
+ switch(static_cast<Property>(i)) {
+ case Property::IsSSA:
+ ROS << (HasProperty ? "SSA, " : "Post SSA, ");
+ break;
+ case Property::TracksLiveness:
+ ROS << (HasProperty ? "" : "not ") << "tracking liveness, ";
+ break;
+ case Property::AllVRegsAllocated:
+ ROS << (HasProperty ? "AllVRegsAllocated" : "HasVRegs");
+ break;
+ default:
+ break;
+ }
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFunction implementation
+//===----------------------------------------------------------------------===//
+
+// Out-of-line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() {}
+
+void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+ MBB->getParent()->DeleteMachineBasicBlock(MBB);
+}
+
+static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
+ const Function *Fn) {
+ if (Fn->hasFnAttribute(Attribute::StackAlignment))
+ return Fn->getFnStackAlignment();
+ return STI->getFrameLowering()->getStackAlignment();
+}
+
+MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
+ unsigned FunctionNum, MachineModuleInfo &mmi)
+ : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
+ MMI(mmi) {
+ // Assume the function starts in SSA form with correct liveness.
+ Properties.set(MachineFunctionProperties::Property::IsSSA);
+ Properties.set(MachineFunctionProperties::Property::TracksLiveness);
+ if (STI->getRegisterInfo())
+ RegInfo = new (Allocator) MachineRegisterInfo(this);
+ else
+ RegInfo = nullptr;
+
+ MFInfo = nullptr;
+ // We can realign the stack if the target supports it and the user hasn't
+ // explicitly asked us not to.
+ bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
+ !F->hasFnAttribute("no-realign-stack");
+ FrameInfo = new (Allocator) MachineFrameInfo(
+ getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP,
+ /*ForceRealign=*/CanRealignSP &&
+ F->hasFnAttribute(Attribute::StackAlignment));
+
+ if (Fn->hasFnAttribute(Attribute::StackAlignment))
+ FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
+
+ ConstantPool = new (Allocator) MachineConstantPool(getDataLayout());
+ Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
+
+ // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+ // FIXME: Use Function::optForSize().
+ if (!Fn->hasFnAttribute(Attribute::OptimizeForSize))
+ Alignment = std::max(Alignment,
+ STI->getTargetLowering()->getPrefFunctionAlignment());
+
+ if (AlignAllFunctions)
+ Alignment = AlignAllFunctions;
+
+ FunctionNumber = FunctionNum;
+ JumpTableInfo = nullptr;
+
+ if (isFuncletEHPersonality(classifyEHPersonality(
+ F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) {
+ WinEHInfo = new (Allocator) WinEHFuncInfo();
+ }
+
+ assert(TM.isCompatibleDataLayout(getDataLayout()) &&
+ "Can't create a MachineFunction using a Module with a "
+ "Target-incompatible DataLayout attached\n");
+
+ PSVManager = llvm::make_unique<PseudoSourceValueManager>();
+}
+
+MachineFunction::~MachineFunction() {
+ // Don't call destructors on MachineInstr and MachineOperand. All of their
+ // memory comes from the BumpPtrAllocator which is about to be purged.
+ //
+ // Do call MachineBasicBlock destructors, it contains std::vectors.
+ for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I))
+ I->Insts.clearAndLeakNodesUnsafely();
+
+ InstructionRecycler.clear(Allocator);
+ OperandRecycler.clear(Allocator);
+ BasicBlockRecycler.clear(Allocator);
+ if (RegInfo) {
+ RegInfo->~MachineRegisterInfo();
+ Allocator.Deallocate(RegInfo);
+ }
+ if (MFInfo) {
+ MFInfo->~MachineFunctionInfo();
+ Allocator.Deallocate(MFInfo);
+ }
+
+ FrameInfo->~MachineFrameInfo();
+ Allocator.Deallocate(FrameInfo);
+
+ ConstantPool->~MachineConstantPool();
+ Allocator.Deallocate(ConstantPool);
+
+ if (JumpTableInfo) {
+ JumpTableInfo->~MachineJumpTableInfo();
+ Allocator.Deallocate(JumpTableInfo);
+ }
+
+ if (WinEHInfo) {
+ WinEHInfo->~WinEHFuncInfo();
+ Allocator.Deallocate(WinEHInfo);
+ }
+}
+
+const DataLayout &MachineFunction::getDataLayout() const {
+ return Fn->getParent()->getDataLayout();
+}
+
+/// Get the JumpTableInfo for this function.
+/// If it does not already exist, allocate one.
+MachineJumpTableInfo *MachineFunction::
+getOrCreateJumpTableInfo(unsigned EntryKind) {
+ if (JumpTableInfo) return JumpTableInfo;
+
+ JumpTableInfo = new (Allocator)
+ MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
+ return JumpTableInfo;
+}
+
+/// Should we be emitting segmented stack stuff for the function
+bool MachineFunction::shouldSplitStack() const {
+ return getFunction()->hasFnAttribute("split-stack");
+}
+
+/// This discards all of the MachineBasicBlock numbers and recomputes them.
+/// This guarantees that the MBB numbers are sequential, dense, and match the
+/// ordering of the blocks within the function. If a specific MachineBasicBlock
+/// is specified, only that block and those after it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+ if (empty()) { MBBNumbering.clear(); return; }
+ MachineFunction::iterator MBBI, E = end();
+ if (MBB == nullptr)
+ MBBI = begin();
+ else
+ MBBI = MBB->getIterator();
+
+ // Figure out the block number this should have.
+ unsigned BlockNo = 0;
+ if (MBBI != begin())
+ BlockNo = std::prev(MBBI)->getNumber() + 1;
+
+ for (; MBBI != E; ++MBBI, ++BlockNo) {
+ if (MBBI->getNumber() != (int)BlockNo) {
+ // Remove use of the old number.
+ if (MBBI->getNumber() != -1) {
+ assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+ "MBB number mismatch!");
+ MBBNumbering[MBBI->getNumber()] = nullptr;
+ }
+
+ // If BlockNo is already taken, set that block's number to -1.
+ if (MBBNumbering[BlockNo])
+ MBBNumbering[BlockNo]->setNumber(-1);
+
+ MBBNumbering[BlockNo] = &*MBBI;
+ MBBI->setNumber(BlockNo);
+ }
+ }
+
+ // Okay, all the blocks are renumbered. If we have compactified the block
+ // numbering, shrink MBBNumbering now.
+ assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+ MBBNumbering.resize(BlockNo);
+}
+
+/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'.
+MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
+ const DebugLoc &DL,
+ bool NoImp) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, MCID, DL, NoImp);
+}
+
+/// Create a new MachineInstr which is a copy of the 'Orig' instruction,
+/// identical in all ways except the instruction has no parent, prev, or next.
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+ return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+ MachineInstr(*this, *Orig);
+}
+
+/// Delete the given MachineInstr.
+///
+/// This function also serves as the MachineInstr destructor - the real
+/// ~MachineInstr() destructor must be empty.
+void
+MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+ // Strip it for parts. The operand array and the MI object itself are
+ // independently recyclable.
+ if (MI->Operands)
+ deallocateOperandArray(MI->CapOperands, MI->Operands);
+ // Don't call ~MachineInstr() which must be trivial anyway because
+ // ~MachineFunction drops whole lists of MachineInstrs wihout calling their
+ // destructors.
+ InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// Allocate a new MachineBasicBlock. Use this instead of
+/// `new MachineBasicBlock'.
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+ return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+ MachineBasicBlock(*this, bb);
+}
+
+/// Delete the given MachineBasicBlock.
+void
+MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+ assert(MBB->getParent() == this && "MBB parent mismatch!");
+ MBB->~MachineBasicBlock();
+ BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
+ unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) {
+ return new (Allocator)
+ MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ int64_t Offset, uint64_t Size) {
+ if (MMO->getValue())
+ return new (Allocator)
+ MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+ MMO->getOffset()+Offset),
+ MMO->getFlags(), Size,
+ MMO->getBaseAlignment());
+ return new (Allocator)
+ MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
+ MMO->getOffset()+Offset),
+ MMO->getFlags(), Size,
+ MMO->getBaseAlignment());
+}
+
+MachineInstr::mmo_iterator
+MachineFunction::allocateMemRefsArray(unsigned long Num) {
+ return Allocator.Allocate<MachineMemOperand *>(Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isLoad())
+ ++Num;
+
+ // Allocate a new array and populate it with the load information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isLoad()) {
+ if (!(*I)->isStore())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the store flag.
+ MachineMemOperand *JustLoad =
+ getMachineMemOperand((*I)->getPointerInfo(),
+ (*I)->getFlags() & ~MachineMemOperand::MOStore,
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getAAInfo());
+ Result[Index] = JustLoad;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+ MachineInstr::mmo_iterator End) {
+ // Count the number of load mem refs.
+ unsigned Num = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+ if ((*I)->isStore())
+ ++Num;
+
+ // Allocate a new array and populate it with the store information.
+ MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+ unsigned Index = 0;
+ for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+ if ((*I)->isStore()) {
+ if (!(*I)->isLoad())
+ // Reuse the MMO.
+ Result[Index] = *I;
+ else {
+ // Clone the MMO and unset the load flag.
+ MachineMemOperand *JustStore =
+ getMachineMemOperand((*I)->getPointerInfo(),
+ (*I)->getFlags() & ~MachineMemOperand::MOLoad,
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getAAInfo());
+ Result[Index] = JustStore;
+ }
+ ++Index;
+ }
+ }
+ return std::make_pair(Result, Result + Num);
+}
+
+const char *MachineFunction::createExternalSymbolName(StringRef Name) {
+ char *Dest = Allocator.Allocate<char>(Name.size() + 1);
+ std::copy(Name.begin(), Name.end(), Dest);
+ Dest[Name.size()] = 0;
+ return Dest;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineFunction::dump() const {
+ print(dbgs());
+}
+#endif
+
+StringRef MachineFunction::getName() const {
+ assert(getFunction() && "No function!");
+ return getFunction()->getName();
+}
+
+void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
+ OS << "# Machine code for function " << getName() << ": ";
+ OS << "Properties: <";
+ getProperties().print(OS);
+ OS << ">\n";
+
+ // Print Frame Information
+ FrameInfo->print(*this, OS);
+
+ // Print JumpTable Information
+ if (JumpTableInfo)
+ JumpTableInfo->print(OS);
+
+ // Print Constant Pool
+ ConstantPool->print(OS);
+
+ const TargetRegisterInfo *TRI = getSubtarget().getRegisterInfo();
+
+ if (RegInfo && !RegInfo->livein_empty()) {
+ OS << "Function Live Ins: ";
+ for (MachineRegisterInfo::livein_iterator
+ I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+ OS << PrintReg(I->first, TRI);
+ if (I->second)
+ OS << " in " << PrintReg(I->second, TRI);
+ if (std::next(I) != E)
+ OS << ", ";
+ }
+ OS << '\n';
+ }
+
+ ModuleSlotTracker MST(getFunction()->getParent());
+ MST.incorporateFunction(*getFunction());
+ for (const auto &BB : *this) {
+ OS << '\n';
+ BB.print(OS, MST, Indexes);
+ }
+
+ OS << "\n# End machine code for function " << getName() << ".\n\n";
+}
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const MachineFunction *F) {
+ return ("CFG for '" + F->getName() + "' function").str();
+ }
+
+ std::string getNodeLabel(const MachineBasicBlock *Node,
+ const MachineFunction *Graph) {
+ std::string OutStr;
+ {
+ raw_string_ostream OSS(OutStr);
+
+ if (isSimple()) {
+ OSS << "BB#" << Node->getNumber();
+ if (const BasicBlock *BB = Node->getBasicBlock())
+ OSS << ": " << BB->getName();
+ } else
+ Node->print(OSS);
+ }
+
+ if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+ // Process string output to make it nicer...
+ for (unsigned i = 0; i != OutStr.length(); ++i)
+ if (OutStr[i] == '\n') { // Left justify
+ OutStr[i] = '\\';
+ OutStr.insert(OutStr.begin()+i+1, 'l');
+ }
+ return OutStr;
+ }
+ };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getName());
+#else
+ errs() << "MachineFunction::viewCFG is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+#ifndef NDEBUG
+ ViewGraph(this, "mf" + getName(), true);
+#else
+ errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+unsigned MachineFunction::addLiveIn(unsigned PReg,
+ const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = getRegInfo();
+ unsigned VReg = MRI.getLiveInVirtReg(PReg);
+ if (VReg) {
+ const TargetRegisterClass *VRegRC = MRI.getRegClass(VReg);
+ (void)VRegRC;
+ // A physical register can be added several times.
+ // Between two calls, the register class of the related virtual register
+ // may have been constrained to match some operation constraints.
+ // In that case, check that the current register class includes the
+ // physical register and is a sub class of the specified RC.
+ assert((VRegRC == RC || (VRegRC->contains(PReg) &&
+ RC->hasSubClassEq(VRegRC))) &&
+ "Register class mismatch!");
+ return VReg;
+ }
+ VReg = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(PReg, VReg);
+ return VReg;
+}
+
+/// Return the MCSymbol for the specified non-empty jump table.
+/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+/// normal 'L' label is returned.
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
+ bool isLinkerPrivate) const {
+ const DataLayout &DL = getDataLayout();
+ assert(JumpTableInfo && "No jump tables");
+ assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
+
+ const char *Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
+ : DL.getPrivateGlobalPrefix();
+ SmallString<60> Name;
+ raw_svector_ostream(Name)
+ << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
+ return Ctx.getOrCreateSymbol(Name);
+}
+
+/// Return a function-local symbol to represent the PIC base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+ const DataLayout &DL = getDataLayout();
+ return Ctx.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ Twine(getFunctionNumber()) + "$pb");
+}
+
+//===----------------------------------------------------------------------===//
+// MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// Make sure the function is at least Align bytes aligned.
+void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+ if (!StackRealignable)
+ assert(Align <= StackAlignment &&
+ "For targets without stack realignment, Align is out of limit!");
+ if (MaxAlignment < Align) MaxAlignment = Align;
+}
+
+/// Clamp the alignment if requested and emit a warning.
+static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
+ unsigned StackAlign) {
+ if (!ShouldClamp || Align <= StackAlign)
+ return Align;
+ DEBUG(dbgs() << "Warning: requested alignment " << Align
+ << " exceeds the stack alignment " << StackAlign
+ << " when stack realignment is off" << '\n');
+ return StackAlign;
+}
+
+/// Create a new statically sized stack object, returning a nonnegative
+/// identifier to represent it.
+int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+ bool isSS, const AllocaInst *Alloca) {
+ assert(Size != 0 && "Cannot allocate zero size stack objects!");
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca,
+ !isSS));
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ assert(Index >= 0 && "Bad frame index!");
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+/// Create a new statically sized stack object that represents a spill slot,
+/// returning a nonnegative identifier to represent it.
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
+ unsigned Alignment) {
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ CreateStackObject(Size, Alignment, true);
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
+ ensureMaxAlignment(Alignment);
+ return Index;
+}
+
+/// Notify the MachineFrameInfo object that a variable sized object has been
+/// created. This must be created whenever a variable sized object is created,
+/// whether or not the index returned is actually used.
+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
+ const AllocaInst *Alloca) {
+ HasVarSizedObjects = true;
+ Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
+ Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true));
+ ensureMaxAlignment(Alignment);
+ return (int)Objects.size()-NumFixedObjects-1;
+}
+
+/// Create a new object at a fixed location on the stack.
+/// All fixed objects should be created before other objects are created for
+/// efficiency. By default, fixed objects are immutable. This returns an
+/// index with a negative value.
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+ bool Immutable, bool isAliased) {
+ assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned. Note that unlike the non-fixed case, if the
+ // stack needs realignment, we can't assume that the stack will in fact be
+ // aligned.
+ unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+ /*isSS*/ false,
+ /*Alloca*/ nullptr, isAliased));
+ return -++NumFixedObjects;
+}
+
+/// Create a spill slot at a fixed location on the stack.
+/// Returns an index with a negative value.
+int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
+ int64_t SPOffset) {
+ unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
+ /*Immutable*/ true,
+ /*isSS*/ true,
+ /*Alloca*/ nullptr,
+ /*isAliased*/ false));
+ return -++NumFixedObjects;
+}
+
+BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ BitVector BV(TRI->getNumRegs());
+
+ // Before CSI is calculated, no registers are considered pristine. They can be
+ // freely used and PEI will make sure they are saved.
+ if (!isCalleeSavedInfoValid())
+ return BV;
+
+ for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ BV.set(*CSR);
+
+ // Saved CSRs are not pristine.
+ for (auto &I : getCalleeSavedInfo())
+ for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
+ BV.reset(*S);
+
+ return BV;
+}
+
+unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ unsigned MaxAlign = getMaxAlignment();
+ int Offset = 0;
+
+ // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+ // It really should be refactored to share code. Until then, changes
+ // should keep in mind that there's tight coupling between the two.
+
+ for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -getObjectOffset(i);
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
+ if (isDeadObjectIndex(i))
+ continue;
+ Offset += getObjectSize(i);
+ unsigned Align = getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset+Align-1)/Align*Align;
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ if (adjustsStack() && TFI->hasReservedCallFrame(MF))
+ Offset += getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (adjustsStack() || hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0))
+ StackAlign = TFI->getStackAlignment();
+ else
+ StackAlign = TFI->getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ unsigned AlignMask = StackAlign - 1;
+ Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
+ return (unsigned)Offset;
+}
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+ if (Objects.empty()) return;
+
+ const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
+ int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+ OS << "Frame Objects:\n";
+
+ for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+ const StackObject &SO = Objects[i];
+ OS << " fi#" << (int)(i-NumFixedObjects) << ": ";
+ if (SO.Size == ~0ULL) {
+ OS << "dead\n";
+ continue;
+ }
+ if (SO.Size == 0)
+ OS << "variable sized";
+ else
+ OS << "size=" << SO.Size;
+ OS << ", align=" << SO.Alignment;
+
+ if (i < NumFixedObjects)
+ OS << ", fixed";
+ if (i < NumFixedObjects || SO.SPOffset != -1) {
+ int64_t Off = SO.SPOffset - ValOffset;
+ OS << ", at location [SP";
+ if (Off > 0)
+ OS << "+" << Off;
+ else if (Off < 0)
+ OS << Off;
+ OS << "]";
+ }
+ OS << "\n";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+ print(MF, dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// Return the size of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
+ // The size of a jump table entry is 4 bytes unless the entry is just the
+ // address of a block, in which case it is the pointer size.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerSize();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return 8;
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return 4;
+ case MachineJumpTableInfo::EK_Inline:
+ return 0;
+ }
+ llvm_unreachable("Unknown jump table encoding!");
+}
+
+/// Return the alignment of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
+ // The alignment of a jump table entry is the alignment of int32 unless the
+ // entry is just the address of a block, in which case it is the pointer
+ // alignment.
+ switch (getEntryKind()) {
+ case MachineJumpTableInfo::EK_BlockAddress:
+ return TD.getPointerABIAlignment();
+ case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+ return TD.getABIIntegerTypeAlignment(64);
+ case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+ case MachineJumpTableInfo::EK_LabelDifference32:
+ case MachineJumpTableInfo::EK_Custom32:
+ return TD.getABIIntegerTypeAlignment(32);
+ case MachineJumpTableInfo::EK_Inline:
+ return 1;
+ }
+ llvm_unreachable("Unknown jump table encoding!");
+}
+
+/// Create a new jump table entry in the jump table info.
+unsigned MachineJumpTableInfo::createJumpTableIndex(
+ const std::vector<MachineBasicBlock*> &DestBBs) {
+ assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+ JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+ return JumpTables.size()-1;
+}
+
+/// If Old is the target of any jump tables, update the jump tables to branch
+/// to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+ ReplaceMBBInJumpTable(i, Old, New);
+ return MadeChange;
+}
+
+/// If Old is a target of the jump tables, update the jump table to branch to
+/// New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+ MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ MachineJumpTableEntry &JTE = JumpTables[Idx];
+ for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+ if (JTE.MBBs[j] == Old) {
+ JTE.MBBs[j] = New;
+ MadeChange = true;
+ }
+ return MadeChange;
+}
+
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
+ if (JumpTables.empty()) return;
+
+ OS << "Jump Tables:\n";
+
+ for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+ OS << " jt#" << i << ": ";
+ for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
+ OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
+ }
+
+ OS << '\n';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineJumpTableInfo::dump() const { print(dbgs()); }
+#endif
+
+
+//===----------------------------------------------------------------------===//
+// MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+void MachineConstantPoolValue::anchor() { }
+
+Type *MachineConstantPoolEntry::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool MachineConstantPoolEntry::needsRelocation() const {
+ if (isMachineConstantPoolEntry())
+ return true;
+ return Val.ConstVal->needsRelocation();
+}
+
+SectionKind
+MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
+ if (needsRelocation())
+ return SectionKind::getReadOnlyWithRel();
+ switch (DL->getTypeAllocSize(getType())) {
+ case 4:
+ return SectionKind::getMergeableConst4();
+ case 8:
+ return SectionKind::getMergeableConst8();
+ case 16:
+ return SectionKind::getMergeableConst16();
+ case 32:
+ return SectionKind::getMergeableConst32();
+ default:
+ return SectionKind::getReadOnly();
+ }
+}
+
+MachineConstantPool::~MachineConstantPool() {
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (Constants[i].isMachineConstantPoolEntry())
+ delete Constants[i].Val.MachineCPVal;
+ for (DenseSet<MachineConstantPoolValue*>::iterator I =
+ MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
+ I != E; ++I)
+ delete *I;
+}
+
+/// Test whether the given two constants can be allocated the same constant pool
+/// entry.
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
+ const DataLayout &DL) {
+ // Handle the trivial case quickly.
+ if (A == B) return true;
+
+ // If they have the same type but weren't the same constant, quickly
+ // reject them.
+ if (A->getType() == B->getType()) return false;
+
+ // We can't handle structs or arrays.
+ if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
+ isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
+ return false;
+
+ // For now, only support constants with the same size.
+ uint64_t StoreSize = DL.getTypeStoreSize(A->getType());
+ if (StoreSize != DL.getTypeStoreSize(B->getType()) || StoreSize > 128)
+ return false;
+
+ Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
+
+ // Try constant folding a bitcast of both instructions to an integer. If we
+ // get two identical ConstantInt's, then we are good to share them. We use
+ // the constant folding APIs to do this so that we get the benefit of
+ // DataLayout.
+ if (isa<PointerType>(A->getType()))
+ A = ConstantFoldCastOperand(Instruction::PtrToInt,
+ const_cast<Constant *>(A), IntTy, DL);
+ else if (A->getType() != IntTy)
+ A = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(A),
+ IntTy, DL);
+ if (isa<PointerType>(B->getType()))
+ B = ConstantFoldCastOperand(Instruction::PtrToInt,
+ const_cast<Constant *>(B), IntTy, DL);
+ else if (B->getType() != IntTy)
+ B = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(B),
+ IntTy, DL);
+
+ return A == B;
+}
+
+/// Create a new entry in the constant pool or return an existing one.
+/// User must specify the log2 of the minimum required alignment for the object.
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+ if (!Constants[i].isMachineConstantPoolEntry() &&
+ CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, DL)) {
+ if ((unsigned)Constants[i].getAlignment() < Alignment)
+ Constants[i].Alignment = Alignment;
+ return i;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+ return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+ unsigned Alignment) {
+ assert(Alignment && "Alignment must be specified!");
+ if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+ // Check to see if we already have this constant.
+ //
+ // FIXME, this could be made much more efficient for large constant pools.
+ int Idx = V->getExistingMachineCPValue(this, Alignment);
+ if (Idx != -1) {
+ MachineCPVsSharingEntries.insert(V);
+ return (unsigned)Idx;
+ }
+
+ Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+ return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+ if (Constants.empty()) return;
+
+ OS << "Constant Pool:\n";
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ OS << " cp#" << i << ": ";
+ if (Constants[i].isMachineConstantPoolEntry())
+ Constants[i].Val.MachineCPVal->print(OS);
+ else
+ Constants[i].Val.ConstVal->printAsOperand(OS, /*PrintType=*/false);
+ OS << ", align=" << Constants[i].getAlignment();
+ OS << "\n";
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineConstantPool::dump() const { print(dbgs()); }
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 000000000000..338cd1e22032
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,60 @@
+//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionInitializer.h"
+using namespace llvm;
+
+char MachineFunctionAnalysis::ID = 0;
+
+MachineFunctionAnalysis::MachineFunctionAnalysis(
+ const TargetMachine &tm, MachineFunctionInitializer *MFInitializer)
+ : FunctionPass(ID), TM(tm), MF(nullptr), MFInitializer(MFInitializer) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineFunctionAnalysis::~MachineFunctionAnalysis() {
+ releaseMemory();
+ assert(!MF && "MachineFunctionAnalysis left initialized!");
+}
+
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineModuleInfo>();
+}
+
+bool MachineFunctionAnalysis::doInitialization(Module &M) {
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ assert(MMI && "MMI not around yet??");
+ MMI->setModule(&M);
+ NextFnNum = 0;
+ return false;
+}
+
+
+bool MachineFunctionAnalysis::runOnFunction(Function &F) {
+ assert(!MF && "MachineFunctionAnalysis already initialized!");
+ MF = new MachineFunction(&F, TM, NextFnNum++,
+ getAnalysis<MachineModuleInfo>());
+ if (MFInitializer)
+ MFInitializer->initializeMachineFunction(*MF);
+ return false;
+}
+
+void MachineFunctionAnalysis::releaseMemory() {
+ delete MF;
+ MF = nullptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 000000000000..228fe170ab46
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,89 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
+ const std::string &Banner) const {
+ return createMachineFunctionPrinterPass(O, Banner);
+}
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ return false;
+
+ MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+ MachineFunctionProperties &MFProps = MF.getProperties();
+
+#ifndef NDEBUG
+ if (!MFProps.verifyRequiredProperties(RequiredProperties)) {
+ errs() << "MachineFunctionProperties required by " << getPassName()
+ << " pass are not met by function " << F.getName() << ".\n"
+ << "Required properties: ";
+ RequiredProperties.print(errs(), /*OnlySet=*/true);
+ errs() << "\nCurrent properties: ";
+ MFProps.print(errs());
+ errs() << "\n";
+ llvm_unreachable("MachineFunctionProperties check failed");
+ }
+#endif
+
+ bool RV = runOnMachineFunction(MF);
+
+ MFProps.set(SetProperties);
+ MFProps.clear(ClearedProperties);
+ return RV;
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+
+ // MachineFunctionPass preserves all LLVM IR passes, but there's no
+ // high-level way to express this. Instead, just list a bunch of
+ // passes explicitly. This does not include setPreservesCFG,
+ // because CodeGen overloads that to mean preserving the MachineBasicBlock
+ // CFG in addition to the LLVM IR CFG.
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<DominanceFrontierWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<IVUsersWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addPreserved<MemoryDependenceWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<SCEVAAWrapperPass>();
+ AU.addPreserved<StackProtector>();
+
+ FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
new file mode 100644
index 000000000000..4f424ff292cc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -0,0 +1,69 @@
+//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineFunctionPrinterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+/// MachineFunctionPrinterPass - This is a pass to dump the IR of a
+/// MachineFunction.
+///
+struct MachineFunctionPrinterPass : public MachineFunctionPass {
+ static char ID;
+
+ raw_ostream &OS;
+ const std::string Banner;
+
+ MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { }
+ MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
+ : MachineFunctionPass(ID), OS(os), Banner(banner) {}
+
+ const char *getPassName() const override { return "MachineFunction Printer"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!llvm::isFunctionInPrintList(MF.getName()))
+ return false;
+ OS << "# " << Banner << ":\n";
+ MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
+ return false;
+ }
+};
+
+char MachineFunctionPrinterPass::ID = 0;
+}
+
+char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer",
+ "Machine Function Printer", false, false)
+
+namespace llvm {
+/// Returns a newly-created MachineFunction Printer pass. The
+/// default banner is empty.
+///
+MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
+ const std::string &Banner){
+ return new MachineFunctionPrinterPass(OS, Banner);
+}
+
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 000000000000..3cdf8d2941d3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,2236 @@
+//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintWholeRegMask(
+ "print-whole-regmask",
+ cl::desc("Print the full contents of regmask operands in IR dumps"),
+ cl::init(true), cl::Hidden);
+
+//===----------------------------------------------------------------------===//
+// MachineOperand Implementation
+//===----------------------------------------------------------------------===//
+
+void MachineOperand::setReg(unsigned Reg) {
+ if (getReg() == Reg) return; // No change.
+
+ // Otherwise, we have to change the register. If this operand is embedded
+ // into a machine function, we need to update the old and new register's
+ // use/def lists.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ SmallContents.RegNo = Reg;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+
+ // Otherwise, just change the register, no problem. :)
+ SmallContents.RegNo = Reg;
+}
+
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ if (SubIdx && getSubReg())
+ SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+ setReg(Reg);
+ if (SubIdx)
+ setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (getSubReg()) {
+ Reg = TRI.getSubReg(Reg, getSubReg());
+ // Note that getSubReg() may return 0 if the sub-register doesn't exist.
+ // That won't happen in legal code.
+ setSubReg(0);
+ }
+ setReg(Reg);
+}
+
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+ assert(isReg() && "Wrong MachineOperand accessor");
+ assert((!Val || !isDebug()) && "Marking a debug operation as def");
+ if (IsDef == Val)
+ return;
+ // MRI may keep uses and defs in different list positions.
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent()) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MRI.removeRegOperandFromUseList(this);
+ IsDef = Val;
+ MRI.addRegOperandToUseList(this);
+ return;
+ }
+ IsDef = Val;
+}
+
+// If this operand is currently a register operand, and if this is in a
+// function, deregister the operand from the register's use/def list.
+void MachineOperand::removeRegFromUses() {
+ if (!isReg() || !isOnRegUseList())
+ return;
+
+ if (MachineInstr *MI = getParent()) {
+ if (MachineBasicBlock *MBB = MI->getParent()) {
+ if (MachineFunction *MF = MBB->getParent())
+ MF->getRegInfo().removeRegOperandFromUseList(this);
+ }
+ }
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value. If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+ removeRegFromUses();
+
+ OpKind = MO_Immediate;
+ Contents.ImmVal = ImmVal;
+}
+
+void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
+ assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
+
+ removeRegFromUses();
+
+ OpKind = MO_FPImmediate;
+ Contents.CFP = FPImm;
+}
+
+void MachineOperand::ChangeToES(const char *SymName, unsigned char TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into an external symbol");
+
+ removeRegFromUses();
+
+ OpKind = MO_ExternalSymbol;
+ Contents.OffsetedInfo.Val.SymbolName = SymName;
+ setOffset(0); // Offset is always 0.
+ setTargetFlags(TargetFlags);
+}
+
+void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into an MCSymbol");
+
+ removeRegFromUses();
+
+ OpKind = MO_MCSymbol;
+ Contents.Sym = Sym;
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value. If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+ bool isKill, bool isDead, bool isUndef,
+ bool isDebug) {
+ MachineRegisterInfo *RegInfo = nullptr;
+ if (MachineInstr *MI = getParent())
+ if (MachineBasicBlock *MBB = MI->getParent())
+ if (MachineFunction *MF = MBB->getParent())
+ RegInfo = &MF->getRegInfo();
+ // If this operand is already a register operand, remove it from the
+ // register's use/def lists.
+ bool WasReg = isReg();
+ if (RegInfo && WasReg)
+ RegInfo->removeRegOperandFromUseList(this);
+
+ // Change this to a register and set the reg#.
+ OpKind = MO_Register;
+ SmallContents.RegNo = Reg;
+ SubReg_TargetFlags = 0;
+ IsDef = isDef;
+ IsImp = isImp;
+ IsKill = isKill;
+ IsDead = isDead;
+ IsUndef = isUndef;
+ IsInternalRead = false;
+ IsEarlyClobber = false;
+ IsDebug = isDebug;
+ // Ensure isOnRegUseList() returns false.
+ Contents.Reg.Prev = nullptr;
+ // Preserve the tie when the operand was already a register.
+ if (!WasReg)
+ TiedTo = 0;
+
+ // If this operand is embedded in a function, add the operand to the
+ // register's use/def list.
+ if (RegInfo)
+ RegInfo->addRegOperandToUseList(this);
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand. Note that this should stay in sync with the hash_value overload
+/// below.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+ if (getType() != Other.getType() ||
+ getTargetFlags() != Other.getTargetFlags())
+ return false;
+
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+ getSubReg() == Other.getSubReg();
+ case MachineOperand::MO_Immediate:
+ return getImm() == Other.getImm();
+ case MachineOperand::MO_CImmediate:
+ return getCImm() == Other.getCImm();
+ case MachineOperand::MO_FPImmediate:
+ return getFPImm() == Other.getFPImm();
+ case MachineOperand::MO_MachineBasicBlock:
+ return getMBB() == Other.getMBB();
+ case MachineOperand::MO_FrameIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_JumpTableIndex:
+ return getIndex() == Other.getIndex();
+ case MachineOperand::MO_GlobalAddress:
+ return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+ case MachineOperand::MO_ExternalSymbol:
+ return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_BlockAddress:
+ return getBlockAddress() == Other.getBlockAddress() &&
+ getOffset() == Other.getOffset();
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ return getRegMask() == Other.getRegMask();
+ case MachineOperand::MO_MCSymbol:
+ return getMCSymbol() == Other.getMCSymbol();
+ case MachineOperand::MO_CFIIndex:
+ return getCFIIndex() == Other.getCFIIndex();
+ case MachineOperand::MO_Metadata:
+ return getMetadata() == Other.getMetadata();
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+// Note: this must stay exactly in sync with isIdenticalTo above.
+hash_code llvm::hash_value(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ // Register operands don't have target flags.
+ return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm());
+ case MachineOperand::MO_MachineBasicBlock:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB());
+ case MachineOperand::MO_FrameIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
+ MO.getOffset());
+ case MachineOperand::MO_JumpTableIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ MO.getSymbolName());
+ case MachineOperand::MO_GlobalAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
+ MO.getOffset());
+ case MachineOperand::MO_BlockAddress:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getBlockAddress(), MO.getOffset());
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+ case MachineOperand::MO_Metadata:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
+ case MachineOperand::MO_MCSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+ case MachineOperand::MO_CFIIndex:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+void MachineOperand::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) const {
+ ModuleSlotTracker DummyMST(nullptr);
+ print(OS, DummyMST, TRI);
+}
+
+void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
+ const TargetRegisterInfo *TRI) const {
+ switch (getType()) {
+ case MachineOperand::MO_Register:
+ OS << PrintReg(getReg(), TRI, getSubReg());
+
+ if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
+ isInternalRead() || isEarlyClobber() || isTied()) {
+ OS << '<';
+ bool NeedComma = false;
+ if (isDef()) {
+ if (NeedComma) OS << ',';
+ if (isEarlyClobber())
+ OS << "earlyclobber,";
+ if (isImplicit())
+ OS << "imp-";
+ OS << "def";
+ NeedComma = true;
+ // <def,read-undef> only makes sense when getSubReg() is set.
+ // Don't clutter the output otherwise.
+ if (isUndef() && getSubReg())
+ OS << ",read-undef";
+ } else if (isImplicit()) {
+ OS << "imp-use";
+ NeedComma = true;
+ }
+
+ if (isKill()) {
+ if (NeedComma) OS << ',';
+ OS << "kill";
+ NeedComma = true;
+ }
+ if (isDead()) {
+ if (NeedComma) OS << ',';
+ OS << "dead";
+ NeedComma = true;
+ }
+ if (isUndef() && isUse()) {
+ if (NeedComma) OS << ',';
+ OS << "undef";
+ NeedComma = true;
+ }
+ if (isInternalRead()) {
+ if (NeedComma) OS << ',';
+ OS << "internal";
+ NeedComma = true;
+ }
+ if (isTied()) {
+ if (NeedComma) OS << ',';
+ OS << "tied";
+ if (TiedTo != 15)
+ OS << unsigned(TiedTo - 1);
+ }
+ OS << '>';
+ }
+ break;
+ case MachineOperand::MO_Immediate:
+ OS << getImm();
+ break;
+ case MachineOperand::MO_CImmediate:
+ getCImm()->getValue().print(OS, false);
+ break;
+ case MachineOperand::MO_FPImmediate:
+ if (getFPImm()->getType()->isFloatTy()) {
+ OS << getFPImm()->getValueAPF().convertToFloat();
+ } else if (getFPImm()->getType()->isHalfTy()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ bool Unused;
+ APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Unused);
+ OS << "half " << APF.convertToFloat();
+ } else {
+ OS << getFPImm()->getValueAPF().convertToDouble();
+ }
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << "<BB#" << getMBB()->getNumber() << ">";
+ break;
+ case MachineOperand::MO_FrameIndex:
+ OS << "<fi#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ OS << "<cp#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_TargetIndex:
+ OS << "<ti#" << getIndex();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ OS << "<jt#" << getIndex() << '>';
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ OS << "<ga:";
+ getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ OS << "<es:" << getSymbolName();
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_BlockAddress:
+ OS << '<';
+ getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST);
+ if (getOffset()) OS << "+" << getOffset();
+ OS << '>';
+ break;
+ case MachineOperand::MO_RegisterMask: {
+ unsigned NumRegsInMask = 0;
+ unsigned NumRegsEmitted = 0;
+ OS << "<regmask";
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+ unsigned MaskWord = i / 32;
+ unsigned MaskBit = i % 32;
+ if (getRegMask()[MaskWord] & (1 << MaskBit)) {
+ if (PrintWholeRegMask || NumRegsEmitted <= 10) {
+ OS << " " << PrintReg(i, TRI);
+ NumRegsEmitted++;
+ }
+ NumRegsInMask++;
+ }
+ }
+ if (NumRegsEmitted != NumRegsInMask)
+ OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
+ OS << ">";
+ break;
+ }
+ case MachineOperand::MO_RegisterLiveOut:
+ OS << "<regliveout>";
+ break;
+ case MachineOperand::MO_Metadata:
+ OS << '<';
+ getMetadata()->printAsOperand(OS, MST);
+ OS << '>';
+ break;
+ case MachineOperand::MO_MCSymbol:
+ OS << "<MCSym=" << *getMCSymbol() << '>';
+ break;
+ case MachineOperand::MO_CFIIndex:
+ OS << "<call frame instruction>";
+ break;
+ }
+
+ if (unsigned TF = getTargetFlags())
+ OS << "[TF=" << TF << ']';
+}
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+ if (V.isNull() || V.is<const PseudoSourceValue*>()) return 0;
+ return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF,
+ int FI, int64_t Offset) {
+ return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) {
+ return MachinePointerInfo(MF.getPSVManager().getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
+ int64_t Offset) {
+ return MachinePointerInfo(MF.getPSVManager().getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
+ uint64_t s, unsigned int a,
+ const AAMDNodes &AAInfo,
+ const MDNode *Ranges)
+ : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
+ AAInfo(AAInfo), Ranges(Ranges) {
+ assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
+ isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) &&
+ "invalid pointer value");
+ assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+ assert((isLoad() || isStore()) && "Not a load/store!");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+ ID.AddInteger(getOffset());
+ ID.AddInteger(Size);
+ ID.AddPointer(getOpaqueValue());
+ ID.AddInteger(getFlags());
+ ID.AddInteger(getBaseAlignment());
+}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+ // The Value and Offset may differ due to CSE. But the flags and size
+ // should be the same.
+ assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+ assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+ if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+ // Update the alignment value.
+ BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1;
+ // Also update the base and offset, because the new alignment may
+ // not be applicable with the old ones.
+ PtrInfo = MMO->PtrInfo;
+ }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+ return MinAlign(getBaseAlignment(), getOffset());
+}
+
+void MachineMemOperand::print(raw_ostream &OS) const {
+ ModuleSlotTracker DummyMST(nullptr);
+ print(OS, DummyMST);
+}
+void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
+ assert((isLoad() || isStore()) &&
+ "SV has to be a load, store or both.");
+
+ if (isVolatile())
+ OS << "Volatile ";
+
+ if (isLoad())
+ OS << "LD";
+ if (isStore())
+ OS << "ST";
+ OS << getSize();
+
+ // Print the address information.
+ OS << "[";
+ if (const Value *V = getValue())
+ V->printAsOperand(OS, /*PrintType=*/false, MST);
+ else if (const PseudoSourceValue *PSV = getPseudoValue())
+ PSV->printCustom(OS);
+ else
+ OS << "<unknown>";
+
+ unsigned AS = getAddrSpace();
+ if (AS != 0)
+ OS << "(addrspace=" << AS << ')';
+
+ // If the alignment of the memory reference itself differs from the alignment
+ // of the base pointer, print the base alignment explicitly, next to the base
+ // pointer.
+ if (getBaseAlignment() != getAlignment())
+ OS << "(align=" << getBaseAlignment() << ")";
+
+ if (getOffset() != 0)
+ OS << "+" << getOffset();
+ OS << "]";
+
+ // Print the alignment of the reference.
+ if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize())
+ OS << "(align=" << getAlignment() << ")";
+
+ // Print TBAA info.
+ if (const MDNode *TBAAInfo = getAAInfo().TBAA) {
+ OS << "(tbaa=";
+ if (TBAAInfo->getNumOperands() > 0)
+ TBAAInfo->getOperand(0)->printAsOperand(OS, MST);
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print AA scope info.
+ if (const MDNode *ScopeInfo = getAAInfo().Scope) {
+ OS << "(alias.scope=";
+ if (ScopeInfo->getNumOperands() > 0)
+ for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) {
+ ScopeInfo->getOperand(i)->printAsOperand(OS, MST);
+ if (i != ie-1)
+ OS << ",";
+ }
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print AA noalias scope info.
+ if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) {
+ OS << "(noalias=";
+ if (NoAliasInfo->getNumOperands() > 0)
+ for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) {
+ NoAliasInfo->getOperand(i)->printAsOperand(OS, MST);
+ if (i != ie-1)
+ OS << ",";
+ }
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
+ // Print nontemporal info.
+ if (isNonTemporal())
+ OS << "(nontemporal)";
+
+ if (isInvariant())
+ OS << "(invariant)";
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr Implementation
+//===----------------------------------------------------------------------===//
+
+void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
+ if (MCID->ImplicitDefs)
+ for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs;
+ ++ImpDefs)
+ addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (MCID->ImplicitUses)
+ for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses;
+ ++ImpUses)
+ addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true));
+}
+
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the MCInstrDesc.
+MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
+ DebugLoc dl, bool NoImp)
+ : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
+ AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
+ debugLoc(std::move(dl))
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+ ,
+ Ty(nullptr)
+#endif
+{
+ assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+
+ // Reserve space for the expected number of operands.
+ if (unsigned NumOps = MCID->getNumOperands() +
+ MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) {
+ CapOperands = OperandCapacity::get(NumOps);
+ Operands = MF.allocateOperandArray(CapOperands);
+ }
+
+ if (!NoImp)
+ addImplicitDefUseOperands(MF);
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+ : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0),
+ Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs),
+ MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc())
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+ ,
+ Ty(nullptr)
+#endif
+{
+ assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
+
+ CapOperands = OperandCapacity::get(MI.getNumOperands());
+ Operands = MF.allocateOperandArray(CapOperands);
+
+ // Copy operands.
+ for (const MachineOperand &MO : MI.operands())
+ addOperand(MF, MO);
+
+ // Copy all the sensible flags.
+ setFlags(MI.Flags);
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+ if (MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return nullptr;
+}
+
+// Implement dummy setter and getter for type when
+// global-isel is not built.
+// The proper implementation is WIP and is tracked here:
+// PR26576.
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+void MachineInstr::setType(Type *Ty) {}
+
+Type *MachineInstr::getType() const { return nullptr; }
+
+#else
+void MachineInstr::setType(Type *Ty) {
+ assert((!Ty || isPreISelGenericOpcode(getOpcode())) &&
+ "Non generic instructions are not supposed to be typed");
+ this->Ty = Ty;
+}
+
+Type *MachineInstr::getType() const { return Ty; }
+#endif // LLVM_BUILD_GLOBAL_ISEL
+
+/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands already be on their use lists.
+void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+ for (MachineOperand &MO : operands())
+ if (MO.isReg())
+ MRI.removeRegOperandFromUseList(&MO);
+}
+
+/// AddRegOperandsToUseLists - Add all of the register operands in
+/// this instruction from their respective use lists. This requires that the
+/// operands not be on their use lists yet.
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+ for (MachineOperand &MO : operands())
+ if (MO.isReg())
+ MRI.addRegOperandToUseList(&MO);
+}
+
+void MachineInstr::addOperand(const MachineOperand &Op) {
+ MachineBasicBlock *MBB = getParent();
+ assert(MBB && "Use MachineInstrBuilder to add operands to dangling instrs");
+ MachineFunction *MF = MBB->getParent();
+ assert(MF && "Use MachineInstrBuilder to add operands to dangling instrs");
+ addOperand(*MF, Op);
+}
+
+/// Move NumOps MachineOperands from Src to Dst, with support for overlapping
+/// ranges. If MRI is non-null also update use-def chains.
+static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
+ unsigned NumOps, MachineRegisterInfo *MRI) {
+ if (MRI)
+ return MRI->moveOperands(Dst, Src, NumOps);
+
+ // MachineOperand is a trivially copyable type so we can just use memmove.
+ std::memmove(Dst, Src, NumOps * sizeof(MachineOperand));
+}
+
+/// addOperand - Add the specified operand to the instruction. If it is an
+/// implicit operand, it is added to the end of the operand list. If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand).
+void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
+ assert(MCID && "Cannot add operands before providing an instr descriptor");
+
+ // Check if we're adding one of our existing operands.
+ if (&Op >= Operands && &Op < Operands + NumOperands) {
+ // This is unusual: MI->addOperand(MI->getOperand(i)).
+ // If adding Op requires reallocating or moving existing operands around,
+ // the Op reference could go stale. Support it by copying Op.
+ MachineOperand CopyOp(Op);
+ return addOperand(MF, CopyOp);
+ }
+
+ // Find the insert location for the new operand. Implicit registers go at
+ // the end, everything else goes before the implicit regs.
+ //
+ // FIXME: Allow mixed explicit and implicit operands on inline asm.
+ // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as
+ // implicit-defs, but they must not be moved around. See the FIXME in
+ // InstrEmitter.cpp.
+ unsigned OpNo = getNumOperands();
+ bool isImpReg = Op.isReg() && Op.isImplicit();
+ if (!isImpReg && !isInlineAsm()) {
+ while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
+ --OpNo;
+ assert(!Operands[OpNo].isTied() && "Cannot move tied operands");
+ }
+ }
+
+#ifndef NDEBUG
+ bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata;
+ // OpNo now points as the desired insertion point. Unless this is a variadic
+ // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
+ // RegMask operands go between the explicit and implicit operands.
+ assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
+ OpNo < MCID->getNumOperands() || isMetaDataOp) &&
+ "Trying to add an operand to a machine instr that is already done!");
+#endif
+
+ MachineRegisterInfo *MRI = getRegInfo();
+
+ // Determine if the Operands array needs to be reallocated.
+ // Save the old capacity and operand array.
+ OperandCapacity OldCap = CapOperands;
+ MachineOperand *OldOperands = Operands;
+ if (!OldOperands || OldCap.getSize() == getNumOperands()) {
+ CapOperands = OldOperands ? OldCap.getNext() : OldCap.get(1);
+ Operands = MF.allocateOperandArray(CapOperands);
+ // Move the operands before the insertion point.
+ if (OpNo)
+ moveOperands(Operands, OldOperands, OpNo, MRI);
+ }
+
+ // Move the operands following the insertion point.
+ if (OpNo != NumOperands)
+ moveOperands(Operands + OpNo + 1, OldOperands + OpNo, NumOperands - OpNo,
+ MRI);
+ ++NumOperands;
+
+ // Deallocate the old operand array.
+ if (OldOperands != Operands && OldOperands)
+ MF.deallocateOperandArray(OldCap, OldOperands);
+
+ // Copy Op into place. It still needs to be inserted into the MRI use lists.
+ MachineOperand *NewMO = new (Operands + OpNo) MachineOperand(Op);
+ NewMO->ParentMI = this;
+
+ // When adding a register operand, tell MRI about it.
+ if (NewMO->isReg()) {
+ // Ensure isOnRegUseList() returns false, regardless of Op's status.
+ NewMO->Contents.Reg.Prev = nullptr;
+ // Ignore existing ties. This is not a property that can be copied.
+ NewMO->TiedTo = 0;
+ // Add the new operand to MRI, but only for instructions in an MBB.
+ if (MRI)
+ MRI->addRegOperandToUseList(NewMO);
+ // The MCID operand information isn't accurate until we start adding
+ // explicit operands. The implicit operands are added first, then the
+ // explicits are inserted before them.
+ if (!isImpReg) {
+ // Tie uses to defs as indicated in MCInstrDesc.
+ if (NewMO->isUse()) {
+ int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ tieOperands(DefIdx, OpNo);
+ }
+ // If the register operand is flagged as early, mark the operand as such.
+ if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
+ NewMO->setIsEarlyClobber(true);
+ }
+ }
+}
+
+/// RemoveOperand - Erase an operand from an instruction, leaving it with one
+/// fewer operand than it started with.
+///
+void MachineInstr::RemoveOperand(unsigned OpNo) {
+ assert(OpNo < getNumOperands() && "Invalid operand number");
+ untieRegOperand(OpNo);
+
+#ifndef NDEBUG
+ // Moving tied operands would break the ties.
+ for (unsigned i = OpNo + 1, e = getNumOperands(); i != e; ++i)
+ if (Operands[i].isReg())
+ assert(!Operands[i].isTied() && "Cannot move tied operands");
+#endif
+
+ MachineRegisterInfo *MRI = getRegInfo();
+ if (MRI && Operands[OpNo].isReg())
+ MRI->removeRegOperandFromUseList(Operands + OpNo);
+
+ // Don't call the MachineOperand destructor. A lot of this code depends on
+ // MachineOperand having a trivial destructor anyway, and adding a call here
+ // wouldn't make it 'destructor-correct'.
+
+ if (unsigned N = NumOperands - 1 - OpNo)
+ moveOperands(Operands + OpNo, Operands + OpNo + 1, N, MRI);
+ --NumOperands;
+}
+
+/// addMemOperand - Add a MachineMemOperand to the machine instruction.
+/// This function should be used only occasionally. The setMemRefs function
+/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::addMemOperand(MachineFunction &MF,
+ MachineMemOperand *MO) {
+ mmo_iterator OldMemRefs = MemRefs;
+ unsigned OldNumMemRefs = NumMemRefs;
+
+ unsigned NewNum = NumMemRefs + 1;
+ mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+
+ std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
+ NewMemRefs[NewNum - 1] = MO;
+ setMemRefs(NewMemRefs, NewMemRefs + NewNum);
+}
+
+/// Check to see if the MMOs pointed to by the two MemRefs arrays are
+/// identical.
+static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) {
+ auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end();
+ auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end();
+ if ((E1 - I1) != (E2 - I2))
+ return false;
+ for (; I1 != E1; ++I1, ++I2) {
+ if (**I1 != **I2)
+ return false;
+ }
+ return true;
+}
+
+std::pair<MachineInstr::mmo_iterator, unsigned>
+MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
+
+ // If either of the incoming memrefs are empty, we must be conservative and
+ // treat this as if we've exhausted our space for memrefs and dropped them.
+ if (memoperands_empty() || Other.memoperands_empty())
+ return std::make_pair(nullptr, 0);
+
+ // If both instructions have identical memrefs, we don't need to merge them.
+ // Since many instructions have a single memref, and we tend to merge things
+ // like pairs of loads from the same location, this catches a large number of
+ // cases in practice.
+ if (hasIdenticalMMOs(*this, Other))
+ return std::make_pair(MemRefs, NumMemRefs);
+
+ // TODO: consider uniquing elements within the operand lists to reduce
+ // space usage and fall back to conservative information less often.
+ size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs;
+
+ // If we don't have enough room to store this many memrefs, be conservative
+ // and drop them. Otherwise, we'd fail asserts when trying to add them to
+ // the new instruction.
+ if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs))
+ return std::make_pair(nullptr, 0);
+
+ MachineFunction *MF = getParent()->getParent();
+ mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
+ mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(),
+ MemBegin);
+ MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(),
+ MemEnd);
+ assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs &&
+ "missing memrefs");
+
+ return std::make_pair(MemBegin, CombinedNumMemRefs);
+}
+
+bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+ assert(!isBundledWithPred() && "Must be called on bundle header");
+ for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
+ if (MII->getDesc().getFlags() & Mask) {
+ if (Type == AnyInBundle)
+ return true;
+ } else {
+ if (Type == AllInBundle && !MII->isBundle())
+ return false;
+ }
+ // This was the last instruction in the bundle.
+ if (!MII->isBundledWithSucc())
+ return Type == AllInBundle;
+ }
+}
+
+bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
+ MICheckType Check) const {
+ // If opcodes or number of operands are not the same then the two
+ // instructions are obviously not identical.
+ if (Other.getOpcode() != getOpcode() ||
+ Other.getNumOperands() != getNumOperands())
+ return false;
+
+ if (isBundle()) {
+ // Both instructions are bundles, compare MIs inside the bundle.
+ MachineBasicBlock::const_instr_iterator I1 = getIterator();
+ MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I2 = Other.getIterator();
+ MachineBasicBlock::const_instr_iterator E2 = Other.getParent()->instr_end();
+ while (++I1 != E1 && I1->isInsideBundle()) {
+ ++I2;
+ if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(*I2, Check))
+ return false;
+ }
+ }
+
+ // Check operands to make sure they match.
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ const MachineOperand &OMO = Other.getOperand(i);
+ if (!MO.isReg()) {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ continue;
+ }
+
+ // Clients may or may not want to ignore defs when testing for equality.
+ // For example, machine CSE pass only cares about finding common
+ // subexpressions, so it's safe to ignore virtual register defs.
+ if (MO.isDef()) {
+ if (Check == IgnoreDefs)
+ continue;
+ else if (Check == IgnoreVRegDefs) {
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+ if (MO.getReg() != OMO.getReg())
+ return false;
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isDead() != OMO.isDead())
+ return false;
+ }
+ } else {
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ if (Check == CheckKillDead && MO.isKill() != OMO.isKill())
+ return false;
+ }
+ }
+ // If DebugLoc does not match then two dbg.values are not identical.
+ if (isDebugValue())
+ if (getDebugLoc() && Other.getDebugLoc() &&
+ getDebugLoc() != Other.getDebugLoc())
+ return false;
+ return true;
+}
+
+MachineInstr *MachineInstr::removeFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ return getParent()->remove(this);
+}
+
+MachineInstr *MachineInstr::removeFromBundle() {
+ assert(getParent() && "Not embedded in a basic block!");
+ return getParent()->remove_instr(this);
+}
+
+void MachineInstr::eraseFromParent() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase(this);
+}
+
+void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
+ assert(getParent() && "Not embedded in a basic block!");
+ MachineBasicBlock *MBB = getParent();
+ MachineFunction *MF = MBB->getParent();
+ assert(MF && "Not embedded in a function!");
+
+ MachineInstr *MI = (MachineInstr *)this;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ MRI.markUsesInDebugValueAsUndef(Reg);
+ }
+ MI->eraseFromParent();
+}
+
+void MachineInstr::eraseFromBundle() {
+ assert(getParent() && "Not embedded in a basic block!");
+ getParent()->erase_instr(this);
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+ unsigned NumOperands = MCID->getNumOperands();
+ if (!MCID->isVariadic())
+ return NumOperands;
+
+ for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit())
+ NumOperands++;
+ }
+ return NumOperands;
+}
+
+void MachineInstr::bundleWithPred() {
+ assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
+ setFlag(BundledPred);
+ MachineBasicBlock::instr_iterator Pred = getIterator();
+ --Pred;
+ assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+ Pred->setFlag(BundledSucc);
+}
+
+void MachineInstr::bundleWithSucc() {
+ assert(!isBundledWithSucc() && "MI is already bundled with its successor");
+ setFlag(BundledSucc);
+ MachineBasicBlock::instr_iterator Succ = getIterator();
+ ++Succ;
+ assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags");
+ Succ->setFlag(BundledPred);
+}
+
+void MachineInstr::unbundleFromPred() {
+ assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
+ clearFlag(BundledPred);
+ MachineBasicBlock::instr_iterator Pred = getIterator();
+ --Pred;
+ assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags");
+ Pred->clearFlag(BundledSucc);
+}
+
+void MachineInstr::unbundleFromSucc() {
+ assert(isBundledWithSucc() && "MI isn't bundled with its successor");
+ clearFlag(BundledSucc);
+ MachineBasicBlock::instr_iterator Succ = getIterator();
+ ++Succ;
+ assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
+ Succ->clearFlag(BundledPred);
+}
+
+bool MachineInstr::isStackAligningInlineAsm() const {
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ return true;
+ }
+ return false;
+}
+
+InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const {
+ assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!");
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0);
+}
+
+int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx,
+ unsigned *GroupNo) const {
+ assert(isInlineAsm() && "Expected an inline asm instruction");
+ assert(OpIdx < getNumOperands() && "OpIdx out of range");
+
+ // Ignore queries about the initial operands.
+ if (OpIdx < InlineAsm::MIOp_FirstOperand)
+ return -1;
+
+ unsigned Group = 0;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ // If we reach the implicit register operands, stop looking.
+ if (!FlagMO.isImm())
+ return -1;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ if (i + NumOps > OpIdx) {
+ if (GroupNo)
+ *GroupNo = Group;
+ return i;
+ }
+ ++Group;
+ }
+ return -1;
+}
+
+const DILocalVariable *MachineInstr::getDebugVariable() const {
+ assert(isDebugValue() && "not a DBG_VALUE");
+ return cast<DILocalVariable>(getOperand(2).getMetadata());
+}
+
+const DIExpression *MachineInstr::getDebugExpression() const {
+ assert(isDebugValue() && "not a DBG_VALUE");
+ return cast<DIExpression>(getOperand(3).getMetadata());
+}
+
+const TargetRegisterClass*
+MachineInstr::getRegClassConstraint(unsigned OpIdx,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) const {
+ assert(getParent() && "Can't have an MBB reference here!");
+ assert(getParent()->getParent() && "Can't have an MF reference here!");
+ const MachineFunction &MF = *getParent()->getParent();
+
+ // Most opcodes have fixed constraints in their MCInstrDesc.
+ if (!isInlineAsm())
+ return TII->getRegClass(getDesc(), OpIdx, TRI, MF);
+
+ if (!getOperand(OpIdx).isReg())
+ return nullptr;
+
+ // For tied uses on inline asm, get the constraint from the def.
+ unsigned DefIdx;
+ if (getOperand(OpIdx).isUse() && isRegTiedToDefOperand(OpIdx, &DefIdx))
+ OpIdx = DefIdx;
+
+ // Inline asm stores register class constraints in the flag word.
+ int FlagIdx = findInlineAsmFlagIdx(OpIdx);
+ if (FlagIdx < 0)
+ return nullptr;
+
+ unsigned Flag = getOperand(FlagIdx).getImm();
+ unsigned RCID;
+ if ((InlineAsm::getKind(Flag) == InlineAsm::Kind_RegUse ||
+ InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDef ||
+ InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDefEarlyClobber) &&
+ InlineAsm::hasRegClassConstraint(Flag, RCID))
+ return TRI->getRegClass(RCID);
+
+ // Assume that all registers in a memory operand are pointers.
+ if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
+ return TRI->getPointerRegClass(MF);
+
+ return nullptr;
+}
+
+const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
+ unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI, bool ExploreBundle) const {
+ // Check every operands inside the bundle if we have
+ // been asked to.
+ if (ExploreBundle)
+ for (ConstMIBundleOperands OpndIt(*this); OpndIt.isValid() && CurRC;
+ ++OpndIt)
+ CurRC = OpndIt->getParent()->getRegClassConstraintEffectForVRegImpl(
+ OpndIt.getOperandNo(), Reg, CurRC, TII, TRI);
+ else
+ // Otherwise, just check the current operands.
+ for (unsigned i = 0, e = NumOperands; i < e && CurRC; ++i)
+ CurRC = getRegClassConstraintEffectForVRegImpl(i, Reg, CurRC, TII, TRI);
+ return CurRC;
+}
+
+const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl(
+ unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC,
+ const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const {
+ assert(CurRC && "Invalid initial register class");
+ // Check if Reg is constrained by some of its use/def from MI.
+ const MachineOperand &MO = getOperand(OpIdx);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ return CurRC;
+ // If yes, accumulate the constraints through the operand.
+ return getRegClassConstraintEffect(OpIdx, CurRC, TII, TRI);
+}
+
+const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect(
+ unsigned OpIdx, const TargetRegisterClass *CurRC,
+ const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const {
+ const TargetRegisterClass *OpRC = getRegClassConstraint(OpIdx, TII, TRI);
+ const MachineOperand &MO = getOperand(OpIdx);
+ assert(MO.isReg() &&
+ "Cannot get register constraints for non-register operand");
+ assert(CurRC && "Invalid initial register class");
+ if (unsigned SubIdx = MO.getSubReg()) {
+ if (OpRC)
+ CurRC = TRI->getMatchingSuperRegClass(CurRC, OpRC, SubIdx);
+ else
+ CurRC = TRI->getSubClassWithSubReg(CurRC, SubIdx);
+ } else if (OpRC)
+ CurRC = TRI->getCommonSubClass(CurRC, OpRC);
+ return CurRC;
+}
+
+/// Return the number of instructions inside the MI bundle, not counting the
+/// header instruction.
+unsigned MachineInstr::getBundleSize() const {
+ MachineBasicBlock::const_instr_iterator I = getIterator();
+ unsigned Size = 0;
+ while (I->isBundledWithSucc()) {
+ ++Size;
+ ++I;
+ }
+ return Size;
+}
+
+/// Returns true if the MachineInstr has an implicit-use operand of exactly
+/// the given register (not considering sub/super-registers).
+bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg)
+ return true;
+ }
+ return false;
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightens
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
+ const TargetRegisterInfo *TRI) const {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MOReg == Reg ||
+ (TRI &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ TRI->isSubRegister(MOReg, Reg)))
+ if (!isKill || MO.isKill())
+ return i;
+ }
+ return -1;
+}
+
+/// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+/// indicating if this instruction reads or writes Reg. This also considers
+/// partial defines.
+std::pair<bool,bool>
+MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+ SmallVectorImpl<unsigned> *Ops) const {
+ bool PartDef = false; // Partial redefine.
+ bool FullDef = false; // Full define.
+ bool Use = false;
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (Ops)
+ Ops->push_back(i);
+ if (MO.isUse())
+ Use |= !MO.isUndef();
+ else if (MO.getSubReg() && !MO.isUndef())
+ // A partial <def,undef> doesn't count as reading the register.
+ PartDef = true;
+ else
+ FullDef = true;
+ }
+ // A partial redefine uses Reg unless there is also a full define.
+ return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef);
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int
+MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+ const TargetRegisterInfo *TRI) const {
+ bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ // Accept regmask operands when Overlap is set.
+ // Ignore them when looking for a specific def operand (Overlap == false).
+ if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg))
+ return i;
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ bool Found = (MOReg == Reg);
+ if (!Found && TRI && isPhys &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ if (Overlap)
+ Found = TRI->regsOverlap(MOReg, Reg);
+ else
+ Found = TRI->isSubRegister(MOReg, Reg);
+ }
+ if (Found && (!isDead || MO.isDead()))
+ return i;
+ }
+ return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+ // Don't call MCID.findFirstPredOperandIdx() because this variant
+ // is sometimes called on an instruction that's not yet complete, and
+ // so the number of operands is less than the MCID indicates. In
+ // particular, the PTX target does this.
+ const MCInstrDesc &MCID = getDesc();
+ if (MCID.isPredicable()) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (MCID.OpInfo[i].isPredicate())
+ return i;
+ }
+
+ return -1;
+}
+
+// MachineOperand::TiedTo is 4 bits wide.
+const unsigned TiedMax = 15;
+
+/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other.
+///
+/// Use and def operands can be tied together, indicated by a non-zero TiedTo
+/// field. TiedTo can have these values:
+///
+/// 0: Operand is not tied to anything.
+/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1).
+/// TiedMax: Tied to an operand >= TiedMax-1.
+///
+/// The tied def must be one of the first TiedMax operands on a normal
+/// instruction. INLINEASM instructions allow more tied defs.
+///
+void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) {
+ MachineOperand &DefMO = getOperand(DefIdx);
+ MachineOperand &UseMO = getOperand(UseIdx);
+ assert(DefMO.isDef() && "DefIdx must be a def operand");
+ assert(UseMO.isUse() && "UseIdx must be a use operand");
+ assert(!DefMO.isTied() && "Def is already tied to another use");
+ assert(!UseMO.isTied() && "Use is already tied to another def");
+
+ if (DefIdx < TiedMax)
+ UseMO.TiedTo = DefIdx + 1;
+ else {
+ // Inline asm can use the group descriptors to find tied operands, but on
+ // normal instruction, the tied def must be within the first TiedMax
+ // operands.
+ assert(isInlineAsm() && "DefIdx out of range");
+ UseMO.TiedTo = TiedMax;
+ }
+
+ // UseIdx can be out of range, we'll search for it in findTiedOperandIdx().
+ DefMO.TiedTo = std::min(UseIdx + 1, TiedMax);
+}
+
+/// Given the index of a tied register operand, find the operand it is tied to.
+/// Defs are tied to uses and vice versa. Returns the index of the tied operand
+/// which must exist.
+unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
+ const MachineOperand &MO = getOperand(OpIdx);
+ assert(MO.isTied() && "Operand isn't tied");
+
+ // Normally TiedTo is in range.
+ if (MO.TiedTo < TiedMax)
+ return MO.TiedTo - 1;
+
+ // Uses on normal instructions can be out of range.
+ if (!isInlineAsm()) {
+ // Normal tied defs must be in the 0..TiedMax-1 range.
+ if (MO.isUse())
+ return TiedMax - 1;
+ // MO is a def. Search for the tied use.
+ for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &UseMO = getOperand(i);
+ if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1)
+ return i;
+ }
+ llvm_unreachable("Can't find tied use");
+ }
+
+ // Now deal with inline asm by parsing the operand group descriptor flags.
+ // Find the beginning of each operand group.
+ SmallVector<unsigned, 8> GroupIdx;
+ unsigned OpIdxGroup = ~0u;
+ unsigned NumOps;
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e;
+ i += NumOps) {
+ const MachineOperand &FlagMO = getOperand(i);
+ assert(FlagMO.isImm() && "Invalid tied operand on inline asm");
+ unsigned CurGroup = GroupIdx.size();
+ GroupIdx.push_back(i);
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm());
+ // OpIdx belongs to this operand group.
+ if (OpIdx > i && OpIdx < i + NumOps)
+ OpIdxGroup = CurGroup;
+ unsigned TiedGroup;
+ if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup))
+ continue;
+ // Operands in this group are tied to operands in TiedGroup which must be
+ // earlier. Find the number of operands between the two groups.
+ unsigned Delta = i - GroupIdx[TiedGroup];
+
+ // OpIdx is a use tied to TiedGroup.
+ if (OpIdxGroup == CurGroup)
+ return OpIdx - Delta;
+
+ // OpIdx is a def tied to this use group.
+ if (OpIdxGroup == TiedGroup)
+ return OpIdx + Delta;
+ }
+ llvm_unreachable("Invalid tied operand on inline asm");
+}
+
+/// clearKillInfo - Clears kill flags on all operands.
+///
+void MachineInstr::clearKillInfo() {
+ for (MachineOperand &MO : operands()) {
+ if (MO.isReg() && MO.isUse())
+ MO.setIsKill(false);
+ }
+}
+
+void MachineInstr::substituteRegister(unsigned FromReg,
+ unsigned ToReg,
+ unsigned SubIdx,
+ const TargetRegisterInfo &RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (SubIdx)
+ ToReg = RegInfo.getSubReg(ToReg, SubIdx);
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substPhysReg(ToReg, RegInfo);
+ }
+ } else {
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substVirtReg(ToReg, SubIdx, RegInfo);
+ }
+ }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
+ // Ignore stuff that we obviously can't move.
+ //
+ // Treat volatile loads as stores. This is not strictly necessary for
+ // volatiles, but it is required for atomic loads. It is not allowed to move
+ // a load across an atomic load with Ordering > Monotonic.
+ if (mayStore() || isCall() ||
+ (mayLoad() && hasOrderedMemoryRef())) {
+ SawStore = true;
+ return false;
+ }
+
+ if (isPosition() || isDebugValue() || isTerminator() ||
+ hasUnmodeledSideEffects())
+ return false;
+
+ // See if this instruction does a load. If so, we have to guarantee that the
+ // loaded value doesn't change between the load and the its intended
+ // destination. The check for isInvariantLoad gives the targe the chance to
+ // classify the load as always returning a constant, e.g. a constant pool
+ // load.
+ if (mayLoad() && !isInvariantLoad(AA))
+ // Otherwise, this is a real load. If there is a store between the load and
+ // end of block, we can't move it.
+ return !SawStore;
+
+ return true;
+}
+
+/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
+/// or volatile memory reference, or if the information describing the memory
+/// reference is not available. Return false if it is known to have no ordered
+/// memory references.
+bool MachineInstr::hasOrderedMemoryRef() const {
+ // An instruction known never to access memory won't have a volatile access.
+ if (!mayStore() &&
+ !mayLoad() &&
+ !isCall() &&
+ !hasUnmodeledSideEffects())
+ return false;
+
+ // Otherwise, if the instruction has no memory reference information,
+ // conservatively assume it wasn't preserved.
+ if (memoperands_empty())
+ return true;
+
+ // Check if any of our memory operands are ordered.
+ return any_of(memoperands(), [](const MachineMemOperand *MMO) {
+ return !MMO->isUnordered();
+ });
+}
+
+/// isInvariantLoad - Return true if this instruction is loading from a
+/// location whose value is invariant across the function. For example,
+/// loading a value from the constant pool or from the argument area
+/// of a function if it does not change. This should only return true of
+/// *all* loads the instruction does are invariant (if it does multiple loads).
+bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+ // If the instruction doesn't load at all, it isn't an invariant load.
+ if (!mayLoad())
+ return false;
+
+ // If the instruction has lost its memoperands, conservatively assume that
+ // it may not be an invariant load.
+ if (memoperands_empty())
+ return false;
+
+ const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+
+ for (MachineMemOperand *MMO : memoperands()) {
+ if (MMO->isVolatile()) return false;
+ if (MMO->isStore()) return false;
+ if (MMO->isInvariant()) continue;
+
+ // A load from a constant PseudoSourceValue is invariant.
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue())
+ if (PSV->isConstant(MFI))
+ continue;
+
+ if (const Value *V = MMO->getValue()) {
+ // If we have an AliasAnalysis, ask it whether the memory is constant.
+ if (AA &&
+ AA->pointsToConstantMemory(
+ MemoryLocation(V, MMO->getSize(), MMO->getAAInfo())))
+ continue;
+ }
+
+ // Otherwise assume conservatively.
+ return false;
+ }
+
+ // Everything checks out.
+ return true;
+}
+
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+ if (!isPHI())
+ return 0;
+ assert(getNumOperands() >= 3 &&
+ "It's illegal to have a PHI without source operands");
+
+ unsigned Reg = getOperand(1).getReg();
+ for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+ if (getOperand(i).getReg() != Reg)
+ return 0;
+ return Reg;
+}
+
+bool MachineInstr::hasUnmodeledSideEffects() const {
+ if (hasProperty(MCID::UnmodeledSideEffects))
+ return true;
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ return true;
+ }
+
+ return false;
+}
+
+bool MachineInstr::isLoadFoldBarrier() const {
+ return mayStore() || isCall() || hasUnmodeledSideEffects();
+}
+
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+ for (const MachineOperand &MO : operands()) {
+ if (!MO.isReg() || MO.isUse())
+ continue;
+ if (!MO.isDead())
+ return false;
+ }
+ return true;
+}
+
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(MachineFunction &MF,
+ const MachineInstr &MI) {
+ for (unsigned i = MI.getDesc().getNumOperands(), e = MI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
+ addOperand(MF, MO);
+ }
+}
+
+LLVM_DUMP_METHOD void MachineInstr::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << " " << *this;
+#endif
+}
+
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
+ const Module *M = nullptr;
+ if (const MachineBasicBlock *MBB = getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ M = MF->getFunction()->getParent();
+
+ ModuleSlotTracker MST(M);
+ print(OS, MST, SkipOpers);
+}
+
+void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
+ bool SkipOpers) const {
+ // We can be a bit tidier if we know the MachineFunction.
+ const MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ if (const MachineBasicBlock *MBB = getParent()) {
+ MF = MBB->getParent();
+ if (MF) {
+ MRI = &MF->getRegInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ }
+ }
+
+ // Save a list of virtual registers.
+ SmallVector<unsigned, 8> VirtRegs;
+
+ // Print explicitly defined operands on the left of an assignment syntax.
+ unsigned StartOp = 0, e = getNumOperands();
+ for (; StartOp < e && getOperand(StartOp).isReg() &&
+ getOperand(StartOp).isDef() &&
+ !getOperand(StartOp).isImplicit();
+ ++StartOp) {
+ if (StartOp != 0) OS << ", ";
+ getOperand(StartOp).print(OS, MST, TRI);
+ unsigned Reg = getOperand(StartOp).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ VirtRegs.push_back(Reg);
+ unsigned Size;
+ if (MRI && (Size = MRI->getSize(Reg)))
+ OS << '(' << Size << ')';
+ }
+ }
+
+ if (StartOp != 0)
+ OS << " = ";
+
+ // Print the opcode name.
+ if (TII)
+ OS << TII->getName(getOpcode());
+ else
+ OS << "UNKNOWN";
+
+ if (getType()) {
+ OS << ' ';
+ getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
+ OS << ' ';
+ }
+
+ if (SkipOpers)
+ return;
+
+ // Print the rest of the operands.
+ bool OmittedAnyCallClobbers = false;
+ bool FirstOp = true;
+ unsigned AsmDescOp = ~0u;
+ unsigned AsmOpCount = 0;
+
+ if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
+ // Print asm string.
+ OS << " ";
+ getOperand(InlineAsm::MIOp_AsmString).print(OS, MST, TRI);
+
+ // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ OS << " [sideeffect]";
+ if (ExtraInfo & InlineAsm::Extra_MayLoad)
+ OS << " [mayload]";
+ if (ExtraInfo & InlineAsm::Extra_MayStore)
+ OS << " [maystore]";
+ if (ExtraInfo & InlineAsm::Extra_IsConvergent)
+ OS << " [isconvergent]";
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ OS << " [alignstack]";
+ if (getInlineAsmDialect() == InlineAsm::AD_ATT)
+ OS << " [attdialect]";
+ if (getInlineAsmDialect() == InlineAsm::AD_Intel)
+ OS << " [inteldialect]";
+
+ StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
+ FirstOp = false;
+ }
+
+ for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ VirtRegs.push_back(MO.getReg());
+
+ // Omit call-clobbered registers which aren't used anywhere. This makes
+ // call instructions much less noisy on targets where calls clobber lots
+ // of registers. Don't rely on MO.isDead() because we may be called before
+ // LiveVariables is run, or we may be looking at a non-allocatable reg.
+ if (MRI && isCall() &&
+ MO.isReg() && MO.isImplicit() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MRI->use_empty(Reg)) {
+ bool HasAliasLive = false;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ unsigned AliasReg = *AI;
+ if (!MRI->use_empty(AliasReg)) {
+ HasAliasLive = true;
+ break;
+ }
+ }
+ if (!HasAliasLive) {
+ OmittedAnyCallClobbers = true;
+ continue;
+ }
+ }
+ }
+ }
+
+ if (FirstOp) FirstOp = false; else OS << ",";
+ OS << " ";
+ if (i < getDesc().NumOperands) {
+ const MCOperandInfo &MCOI = getDesc().OpInfo[i];
+ if (MCOI.isPredicate())
+ OS << "pred:";
+ if (MCOI.isOptionalDef())
+ OS << "opt:";
+ }
+ if (isDebugValue() && MO.isMetadata()) {
+ // Pretty print DBG_VALUE instructions.
+ auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
+ if (DIV && !DIV->getName().empty())
+ OS << "!\"" << DIV->getName() << '\"';
+ else
+ MO.print(OS, MST, TRI);
+ } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+ OS << TRI->getSubRegIndexName(MO.getImm());
+ } else if (i == AsmDescOp && MO.isImm()) {
+ // Pretty print the inline asm operand descriptor.
+ OS << '$' << AsmOpCount++;
+ unsigned Flag = MO.getImm();
+ switch (InlineAsm::getKind(Flag)) {
+ case InlineAsm::Kind_RegUse: OS << ":[reguse"; break;
+ case InlineAsm::Kind_RegDef: OS << ":[regdef"; break;
+ case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break;
+ case InlineAsm::Kind_Clobber: OS << ":[clobber"; break;
+ case InlineAsm::Kind_Imm: OS << ":[imm"; break;
+ case InlineAsm::Kind_Mem: OS << ":[mem"; break;
+ default: OS << ":[??" << InlineAsm::getKind(Flag); break;
+ }
+
+ unsigned RCID = 0;
+ if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
+ InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+ if (TRI) {
+ OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
+ } else
+ OS << ":RC" << RCID;
+ }
+
+ if (InlineAsm::isMemKind(Flag)) {
+ unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
+ switch (MCID) {
+ case InlineAsm::Constraint_es: OS << ":es"; break;
+ case InlineAsm::Constraint_i: OS << ":i"; break;
+ case InlineAsm::Constraint_m: OS << ":m"; break;
+ case InlineAsm::Constraint_o: OS << ":o"; break;
+ case InlineAsm::Constraint_v: OS << ":v"; break;
+ case InlineAsm::Constraint_Q: OS << ":Q"; break;
+ case InlineAsm::Constraint_R: OS << ":R"; break;
+ case InlineAsm::Constraint_S: OS << ":S"; break;
+ case InlineAsm::Constraint_T: OS << ":T"; break;
+ case InlineAsm::Constraint_Um: OS << ":Um"; break;
+ case InlineAsm::Constraint_Un: OS << ":Un"; break;
+ case InlineAsm::Constraint_Uq: OS << ":Uq"; break;
+ case InlineAsm::Constraint_Us: OS << ":Us"; break;
+ case InlineAsm::Constraint_Ut: OS << ":Ut"; break;
+ case InlineAsm::Constraint_Uv: OS << ":Uv"; break;
+ case InlineAsm::Constraint_Uy: OS << ":Uy"; break;
+ case InlineAsm::Constraint_X: OS << ":X"; break;
+ case InlineAsm::Constraint_Z: OS << ":Z"; break;
+ case InlineAsm::Constraint_ZC: OS << ":ZC"; break;
+ case InlineAsm::Constraint_Zy: OS << ":Zy"; break;
+ default: OS << ":?"; break;
+ }
+ }
+
+ unsigned TiedTo = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+ OS << " tiedto:$" << TiedTo;
+
+ OS << ']';
+
+ // Compute the index of the next operand descriptor.
+ AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
+ } else
+ MO.print(OS, MST, TRI);
+ }
+
+ // Briefly indicate whether any call clobbers were omitted.
+ if (OmittedAnyCallClobbers) {
+ if (!FirstOp) OS << ",";
+ OS << " ...";
+ }
+
+ bool HaveSemi = false;
+ const unsigned PrintableFlags = FrameSetup | FrameDestroy;
+ if (Flags & PrintableFlags) {
+ if (!HaveSemi) {
+ OS << ";";
+ HaveSemi = true;
+ }
+ OS << " flags: ";
+
+ if (Flags & FrameSetup)
+ OS << "FrameSetup";
+
+ if (Flags & FrameDestroy)
+ OS << "FrameDestroy";
+ }
+
+ if (!memoperands_empty()) {
+ if (!HaveSemi) {
+ OS << ";";
+ HaveSemi = true;
+ }
+
+ OS << " mem:";
+ for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
+ i != e; ++i) {
+ (*i)->print(OS, MST);
+ if (std::next(i) != e)
+ OS << " ";
+ }
+ }
+
+ // Print the regclass of any virtual registers encountered.
+ if (MRI && !VirtRegs.empty()) {
+ if (!HaveSemi) {
+ OS << ";";
+ HaveSemi = true;
+ }
+ for (unsigned i = 0; i != VirtRegs.size(); ++i) {
+ const RegClassOrRegBank &RC = MRI->getRegClassOrRegBank(VirtRegs[i]);
+ if (!RC)
+ continue;
+ // Generic virtual registers do not have register classes.
+ if (RC.is<const RegisterBank *>())
+ OS << " " << RC.get<const RegisterBank *>()->getName();
+ else
+ OS << " "
+ << TRI->getRegClassName(RC.get<const TargetRegisterClass *>());
+ OS << ':' << PrintReg(VirtRegs[i]);
+ for (unsigned j = i+1; j != VirtRegs.size();) {
+ if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) {
+ ++j;
+ continue;
+ }
+ if (VirtRegs[i] != VirtRegs[j])
+ OS << "," << PrintReg(VirtRegs[j]);
+ VirtRegs.erase(VirtRegs.begin()+j);
+ }
+ }
+ }
+
+ // Print debug location information.
+ if (isDebugValue() && getOperand(e - 2).isMetadata()) {
+ if (!HaveSemi)
+ OS << ";";
+ auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata());
+ OS << " line no:" << DV->getLine();
+ if (auto *InlinedAt = debugLoc->getInlinedAt()) {
+ DebugLoc InlinedAtDL(InlinedAt);
+ if (InlinedAtDL && MF) {
+ OS << " inlined @[ ";
+ InlinedAtDL.print(OS);
+ OS << " ]";
+ }
+ }
+ if (isIndirectDebugValue())
+ OS << " indirect";
+ } else if (debugLoc && MF) {
+ if (!HaveSemi)
+ OS << ";";
+ OS << " dbg:";
+ debugLoc.print(OS);
+ }
+
+ OS << '\n';
+}
+
+bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ continue;
+
+ // DEBUG_VALUE nodes do not contribute to code generation and should
+ // always be ignored. Failure to do so may result in trying to modify
+ // KILL flags on DEBUG_VALUE nodes.
+ if (MO.isDebug())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (Reg == IncomingReg) {
+ if (!Found) {
+ if (MO.isKill())
+ // The register is already marked kill.
+ return true;
+ if (isPhysReg && isRegTiedToDefOperand(i))
+ // Two-address uses of physregs must not be marked kill.
+ return true;
+ MO.setIsKill();
+ Found = true;
+ }
+ } else if (hasAliases && MO.isKill() &&
+ TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // A super-register kill already exists.
+ if (RegInfo->isSuperRegister(IncomingReg, Reg))
+ return true;
+ if (RegInfo->isSubRegister(IncomingReg, Reg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded kill operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsKill(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is killed. Add a
+ // new implicit operand if required.
+ if (!Found && AddIfNotFound) {
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ false /*IsDef*/,
+ true /*IsImp*/,
+ true /*IsKill*/));
+ return true;
+ }
+ return Found;
+}
+
+void MachineInstr::clearRegisterKills(unsigned Reg,
+ const TargetRegisterInfo *RegInfo) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ RegInfo = nullptr;
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ continue;
+ unsigned OpReg = MO.getReg();
+ if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg)
+ MO.setIsKill(false);
+ }
+}
+
+bool MachineInstr::addRegisterDead(unsigned Reg,
+ const TargetRegisterInfo *RegInfo,
+ bool AddIfNotFound) {
+ bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg);
+ bool hasAliases = isPhysReg &&
+ MCRegAliasIterator(Reg, RegInfo, false).isValid();
+ bool Found = false;
+ SmallVector<unsigned,4> DeadOps;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+
+ if (MOReg == Reg) {
+ MO.setIsDead();
+ Found = true;
+ } else if (hasAliases && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ // There exists a super-register that's marked dead.
+ if (RegInfo->isSuperRegister(Reg, MOReg))
+ return true;
+ if (RegInfo->isSubRegister(Reg, MOReg))
+ DeadOps.push_back(i);
+ }
+ }
+
+ // Trim unneeded dead operands.
+ while (!DeadOps.empty()) {
+ unsigned OpIdx = DeadOps.back();
+ if (getOperand(OpIdx).isImplicit())
+ RemoveOperand(OpIdx);
+ else
+ getOperand(OpIdx).setIsDead(false);
+ DeadOps.pop_back();
+ }
+
+ // If not found, this means an alias of one of the operands is dead. Add a
+ // new implicit operand if required.
+ if (Found || !AddIfNotFound)
+ return Found;
+
+ addOperand(MachineOperand::CreateReg(Reg,
+ true /*IsDef*/,
+ true /*IsImp*/,
+ false /*IsKill*/,
+ true /*IsDead*/));
+ return true;
+}
+
+void MachineInstr::clearRegisterDeads(unsigned Reg) {
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
+ continue;
+ MO.setIsDead(false);
+ }
+}
+
+void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
+ for (MachineOperand &MO : operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
+ continue;
+ MO.setIsUndef(IsUndef);
+ }
+}
+
+void MachineInstr::addRegisterDefined(unsigned Reg,
+ const TargetRegisterInfo *RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo);
+ if (MO)
+ return;
+ } else {
+ for (const MachineOperand &MO : operands()) {
+ if (MO.isReg() && MO.getReg() == Reg && MO.isDef() &&
+ MO.getSubReg() == 0)
+ return;
+ }
+ }
+ addOperand(MachineOperand::CreateReg(Reg,
+ true /*IsDef*/,
+ true /*IsImp*/));
+}
+
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
+ const TargetRegisterInfo &TRI) {
+ bool HasRegMask = false;
+ for (MachineOperand &MO : operands()) {
+ if (MO.isRegMask()) {
+ HasRegMask = true;
+ continue;
+ }
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ // If there are no uses, including partial uses, the def is dead.
+ if (std::none_of(UsedRegs.begin(), UsedRegs.end(),
+ [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+ MO.setIsDead();
+ }
+
+ // This is a call with a register mask operand.
+ // Mask clobbers are always dead, so add defs for the non-dead defines.
+ if (HasRegMask)
+ for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ I != E; ++I)
+ addRegisterDefined(*I, &TRI);
+}
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+ // Build up a buffer of hash code components.
+ SmallVector<size_t, 8> HashComponents;
+ HashComponents.reserve(MI->getNumOperands() + 1);
+ HashComponents.push_back(MI->getOpcode());
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && MO.isDef() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+
+ HashComponents.push_back(hash_value(MO));
+ }
+ return hash_combine_range(HashComponents.begin(), HashComponents.end());
+}
+
+void MachineInstr::emitError(StringRef Msg) const {
+ // Find the source location cookie.
+ unsigned LocCookie = 0;
+ const MDNode *LocMD = nullptr;
+ for (unsigned i = getNumOperands(); i != 0; --i) {
+ if (getOperand(i-1).isMetadata() &&
+ (LocMD = getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI =
+ mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
+ }
+
+ if (const MachineBasicBlock *MBB = getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+ report_fatal_error(Msg);
+}
+
+MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
+ const MCInstrDesc &MCID, bool IsIndirect,
+ unsigned Reg, unsigned Offset,
+ const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ if (IsIndirect)
+ return BuildMI(MF, DL, MCID)
+ .addReg(Reg, RegState::Debug)
+ .addImm(Offset)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+ else {
+ assert(Offset == 0 && "A direct address cannot have an offset.");
+ return BuildMI(MF, DL, MCID)
+ .addReg(Reg, RegState::Debug)
+ .addReg(0U, RegState::Debug)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+ }
+}
+
+MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, const MCInstrDesc &MCID,
+ bool IsIndirect, unsigned Reg,
+ unsigned Offset, const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ MachineFunction &MF = *BB.getParent();
+ MachineInstr *MI =
+ BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, Variable, Expr);
+ BB.insert(I, MI);
+ return MachineInstrBuilder(MF, MI);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 000000000000..e4686b3c5c4e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,344 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <utility>
+using namespace llvm;
+
+namespace {
+ class UnpackMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr)
+ : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
+ initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ std::function<bool(const Function &)> PredicateFtor;
+ };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
+ "Unpack machine instruction bundles", false, false)
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+ return false;
+
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+
+ for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
+ MIE = MBB->instr_end(); MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+
+ // Remove BUNDLE instruction and the InsideBundle flags from bundled
+ // instructions.
+ if (MI->isBundle()) {
+ while (++MII != MIE && MII->isBundledWithPred()) {
+ MII->unbundleFromPred();
+ for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MII->getOperand(i);
+ if (MO.isReg() && MO.isInternalRead())
+ MO.setIsInternalRead(false);
+ }
+ }
+ MI->eraseFromParent();
+
+ Changed = true;
+ continue;
+ }
+
+ ++MII;
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *
+llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) {
+ return new UnpackMachineBundles(std::move(Ftor));
+}
+
+namespace {
+ class FinalizeMachineBundles : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification
+ FinalizeMachineBundles() : MachineFunctionPass(ID) {
+ initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ };
+} // end anonymous namespace
+
+char FinalizeMachineBundles::ID = 0;
+char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID;
+INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles",
+ "Finalize machine instruction bundles", false, false)
+
+bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+ return llvm::finalizeBundles(MF);
+}
+
+
+/// finalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI,
+ MachineBasicBlock::instr_iterator LastMI) {
+ assert(FirstMI != LastMI && "Empty bundle?");
+ MIBundleBuilder Bundle(MBB, FirstMI, LastMI);
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ MachineInstrBuilder MIB =
+ BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE));
+ Bundle.prepend(MIB);
+
+ SmallVector<unsigned, 32> LocalDefs;
+ SmallSet<unsigned, 32> LocalDefSet;
+ SmallSet<unsigned, 8> DeadDefSet;
+ SmallSet<unsigned, 16> KilledDefSet;
+ SmallVector<unsigned, 8> ExternUses;
+ SmallSet<unsigned, 8> ExternUseSet;
+ SmallSet<unsigned, 8> KilledUseSet;
+ SmallSet<unsigned, 8> UndefUseSet;
+ SmallVector<MachineOperand*, 4> Defs;
+ for (; FirstMI != LastMI; ++FirstMI) {
+ for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = FirstMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ Defs.push_back(&MO);
+ continue;
+ }
+
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (LocalDefSet.count(Reg)) {
+ MO.setIsInternalRead();
+ if (MO.isKill())
+ // Internal def is now killed.
+ KilledDefSet.insert(Reg);
+ } else {
+ if (ExternUseSet.insert(Reg).second) {
+ ExternUses.push_back(Reg);
+ if (MO.isUndef())
+ UndefUseSet.insert(Reg);
+ }
+ if (MO.isKill())
+ // External def is now killed.
+ KilledUseSet.insert(Reg);
+ }
+ }
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ MachineOperand &MO = *Defs[i];
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (LocalDefSet.insert(Reg).second) {
+ LocalDefs.push_back(Reg);
+ if (MO.isDead()) {
+ DeadDefSet.insert(Reg);
+ }
+ } else {
+ // Re-defined inside the bundle, it's no longer killed.
+ KilledDefSet.erase(Reg);
+ if (!MO.isDead())
+ // Previously defined but dead.
+ DeadDefSet.erase(Reg);
+ }
+
+ if (!MO.isDead()) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ unsigned SubReg = *SubRegs;
+ if (LocalDefSet.insert(SubReg).second)
+ LocalDefs.push_back(SubReg);
+ }
+ }
+ }
+
+ Defs.clear();
+ }
+
+ SmallSet<unsigned, 32> Added;
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ if (Added.insert(Reg).second) {
+ // If it's not live beyond end of the bundle, mark it dead.
+ bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+ MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+ getImplRegState(true));
+ }
+ }
+
+ for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+ unsigned Reg = ExternUses[i];
+ bool isKill = KilledUseSet.count(Reg);
+ bool isUndef = UndefUseSet.count(Reg);
+ MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+ getImplRegState(true));
+ }
+}
+
+/// finalizeBundle - Same functionality as the previous finalizeBundle except
+/// the last instruction in the bundle is not provided as an input. This is
+/// used in cases where bundles are pre-determined by marking instructions
+/// with 'InsideBundle' marker. It returns the MBB instruction iterator that
+/// points to the end of the bundle.
+MachineBasicBlock::instr_iterator
+llvm::finalizeBundle(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator FirstMI) {
+ MachineBasicBlock::instr_iterator E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator LastMI = std::next(FirstMI);
+ while (LastMI != E && LastMI->isInsideBundle())
+ ++LastMI;
+ finalizeBundle(MBB, FirstMI, LastMI);
+ return LastMI;
+}
+
+/// finalizeBundles - Finalize instruction bundles in the specified
+/// MachineFunction. Return true if any bundles are finalized.
+bool llvm::finalizeBundles(MachineFunction &MF) {
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock &MBB = *I;
+ MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
+ MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+ if (MII == MIE)
+ continue;
+ assert(!MII->isInsideBundle() &&
+ "First instr cannot be inside bundle before finalization!");
+
+ for (++MII; MII != MIE; ) {
+ if (!MII->isInsideBundle())
+ ++MII;
+ else {
+ MII = finalizeBundle(MBB, std::prev(MII));
+ Changed = true;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineOperand iterator
+//===----------------------------------------------------------------------===//
+
+MachineOperandIteratorBase::VirtRegInfo
+MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
+ SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
+ VirtRegInfo RI = { false, false, false };
+ for(; isValid(); ++*this) {
+ MachineOperand &MO = deref();
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ // Remember each (MI, OpNo) that refers to Reg.
+ if (Ops)
+ Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+
+ // Both defs and uses can read virtual registers.
+ if (MO.readsReg()) {
+ RI.Reads = true;
+ if (MO.isDef())
+ RI.Tied = true;
+ }
+
+ // Only defs can write.
+ if (MO.isDef())
+ RI.Writes = true;
+ else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+ RI.Tied = true;
+ }
+ return RI;
+}
+
+MachineOperandIteratorBase::PhysRegInfo
+MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ bool AllDefsDead = true;
+ PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
+
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "analyzePhysReg not given a physical register!");
+ for (; isValid(); ++*this) {
+ MachineOperand &MO = deref();
+
+ if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) {
+ PRI.Clobbered = true;
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+
+ unsigned MOReg = MO.getReg();
+ if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
+ continue;
+
+ if (!TRI->regsOverlap(MOReg, Reg))
+ continue;
+
+ bool Covered = TRI->isSuperRegisterEq(Reg, MOReg);
+ if (MO.readsReg()) {
+ PRI.Read = true;
+ if (Covered) {
+ PRI.FullyRead = true;
+ if (MO.isKill())
+ PRI.Killed = true;
+ }
+ } else if (MO.isDef()) {
+ PRI.Defined = true;
+ if (Covered)
+ PRI.FullyDefined = true;
+ if (!MO.isDead())
+ AllDefsDead = false;
+ }
+ }
+
+ if (AllDefsDead) {
+ if (PRI.FullyDefined || PRI.Clobbered)
+ PRI.DeadDef = true;
+ else if (PRI.Defined)
+ PRI.PartialDeadDef = true;
+ }
+
+ return PRI;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 000000000000..119751b17f56
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,1388 @@
+//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-licm"
+
+static cl::opt<bool>
+AvoidSpeculation("avoid-speculation",
+ cl::desc("MachineLICM should avoid speculation"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+HoistCheapInsts("hoist-cheap-insts",
+ cl::desc("MachineLICM should hoist even cheap instructions"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
+ cl::desc("MachineLICM should sink instructions into "
+ "loops to avoid register spills"),
+ cl::init(false), cl::Hidden);
+
+STATISTIC(NumHoisted,
+ "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+ "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+ "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+ "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+ "Number of machine instructions hoisted out of loops post regalloc");
+
+namespace {
+ class MachineLICM : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetLoweringBase *TLI;
+ const TargetRegisterInfo *TRI;
+ const MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ TargetSchedModel SchedModel;
+ bool PreRegAlloc;
+
+ // Various analyses that we use...
+ AliasAnalysis *AA; // Alias analysis info.
+ MachineLoopInfo *MLI; // Current MachineLoopInfo
+ MachineDominatorTree *DT; // Machine dominator tree for the cur loop
+
+ // State that is updated as we process loops
+ bool Changed; // True if a loop is changed.
+ bool FirstInLoop; // True if it's the first LICM in the loop.
+ MachineLoop *CurLoop; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+
+ // Exit blocks for CurLoop.
+ SmallVector<MachineBasicBlock*, 8> ExitBlocks;
+
+ bool isExitBlock(const MachineBasicBlock *MBB) const {
+ return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
+ ExitBlocks.end();
+ }
+
+ // Track 'estimated' register pressure.
+ SmallSet<unsigned, 32> RegSeen;
+ SmallVector<unsigned, 8> RegPressure;
+
+ // Register pressure "limit" per register pressure set. If the pressure
+ // is higher than the limit, then it's considered high.
+ SmallVector<unsigned, 8> RegLimit;
+
+ // Register pressure on path leading from loop preheader to current BB.
+ SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
+ // For each opcode, keep a list of potential CSE instructions.
+ DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+
+ enum {
+ SpeculateFalse = 0,
+ SpeculateTrue = 1,
+ SpeculateUnknown = 2
+ };
+
+ // If a MBB does not dominate loop exiting blocks then it may not safe
+ // to hoist loads from this block.
+ // Tri-state: 0 - false, 1 - true, 2 - unknown
+ unsigned SpeculationState;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ MachineLICM() :
+ MachineFunctionPass(ID), PreRegAlloc(true) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ explicit MachineLICM(bool PreRA) :
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ void releaseMemory() override {
+ RegSeen.clear();
+ RegPressure.clear();
+ RegLimit.clear();
+ BackTrace.clear();
+ CSEMap.clear();
+ }
+
+ private:
+ /// Keep track of information about hoisting candidates.
+ struct CandidateInfo {
+ MachineInstr *MI;
+ unsigned Def;
+ int FI;
+ CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+ : MI(mi), Def(def), FI(fi) {}
+ };
+
+ void HoistRegionPostRA();
+
+ void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+ void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
+ SmallVectorImpl<CandidateInfo> &Candidates);
+
+ void AddToLiveIns(unsigned Reg);
+
+ bool IsLICMCandidate(MachineInstr &I);
+
+ bool IsLoopInvariantInst(MachineInstr &I);
+
+ bool HasLoopPHIUse(const MachineInstr *MI) const;
+
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ unsigned Reg) const;
+
+ bool IsCheapInstruction(MachineInstr &MI) const;
+
+ bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
+ bool Cheap);
+
+ void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
+ bool IsProfitableToHoist(MachineInstr &MI);
+
+ bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+
+ void EnterScope(MachineBasicBlock *MBB);
+
+ void ExitScope(MachineBasicBlock *MBB);
+
+ void ExitScopeIfDone(
+ MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
+
+ void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+
+ void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
+
+ void SinkIntoLoop();
+
+ void InitRegPressure(MachineBasicBlock *BB);
+
+ DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
+ bool ConsiderSeen,
+ bool ConsiderUnseenAsDef);
+
+ void UpdateRegPressure(const MachineInstr *MI,
+ bool ConsiderUnseenAsDef = false);
+
+ MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+
+ const MachineInstr *
+ LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr *> &PrevMIs);
+
+ bool EliminateCSE(
+ MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
+
+ bool MayCSE(MachineInstr *MI);
+
+ bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
+
+ void InitCSEMap(MachineBasicBlock *BB);
+
+ MachineBasicBlock *getCurPreheader();
+ };
+} // end anonymous namespace
+
+char MachineLICM::ID = 0;
+char &llvm::MachineLICMID = MachineLICM::ID;
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+
+/// Test if the given loop is the outer-most loop that has a unique predecessor.
+static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
+ // Check whether this loop even has a unique predecessor.
+ if (!CurLoop->getLoopPredecessor())
+ return false;
+ // Ok, now check to see if any of its outer loops do.
+ for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+ if (L->getLoopPredecessor())
+ return false;
+ // None of them did, so this is the outermost with a unique predecessor.
+ return true;
+}
+
+bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ Changed = FirstInLoop = false;
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ TII = ST.getInstrInfo();
+ TLI = ST.getTargetLowering();
+ TRI = ST.getRegisterInfo();
+ MFI = MF.getFrameInfo();
+ MRI = &MF.getRegInfo();
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
+
+ PreRegAlloc = MRI->isSSA();
+
+ if (PreRegAlloc)
+ DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+ else
+ DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ DEBUG(dbgs() << MF.getName() << " ********\n");
+
+ if (PreRegAlloc) {
+ // Estimate register pressure during pre-regalloc pass.
+ unsigned NumRPS = TRI->getNumRegPressureSets();
+ RegPressure.resize(NumRPS);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ RegLimit.resize(NumRPS);
+ for (unsigned i = 0, e = NumRPS; i != e; ++i)
+ RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
+ }
+
+ // Get our Loop information...
+ MLI = &getAnalysis<MachineLoopInfo>();
+ DT = &getAnalysis<MachineDominatorTree>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+ while (!Worklist.empty()) {
+ CurLoop = Worklist.pop_back_val();
+ CurPreheader = nullptr;
+ ExitBlocks.clear();
+
+ // If this is done before regalloc, only visit outer-most preheader-sporting
+ // loops.
+ if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
+ Worklist.append(CurLoop->begin(), CurLoop->end());
+ continue;
+ }
+
+ CurLoop->getExitBlocks(ExitBlocks);
+
+ if (!PreRegAlloc)
+ HoistRegionPostRA();
+ else {
+ // CSEMap is initialized for loop header when the first instruction is
+ // being hoisted.
+ MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+ FirstInLoop = true;
+ HoistOutOfLoop(N);
+ CSEMap.clear();
+
+ if (SinkInstsToAvoidSpills)
+ SinkIntoLoop();
+ }
+ }
+
+ return Changed;
+}
+
+/// Return true if instruction stores to the specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+ // If we lost memory operands, conservatively assume that the instruction
+ // writes to all slots.
+ if (MI->memoperands_empty())
+ return true;
+ for (const MachineMemOperand *MemOp : MI->memoperands()) {
+ if (!MemOp->isStore() || !MemOp->getPseudoValue())
+ continue;
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
+ if (Value->getFrameIndex() == FI)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICM::ProcessMI(MachineInstr *MI,
+ BitVector &PhysRegDefs,
+ BitVector &PhysRegClobbers,
+ SmallSet<int, 32> &StoredFIs,
+ SmallVectorImpl<CandidateInfo> &Candidates) {
+ bool RuledOut = false;
+ bool HasNonInvariantUse = false;
+ unsigned Def = 0;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isFI()) {
+ // Remember if the instruction stores to the frame index.
+ int FI = MO.getIndex();
+ if (!StoredFIs.count(FI) &&
+ MFI->isSpillSlotObjectIndex(FI) &&
+ InstructionStoresToFI(MI, FI))
+ StoredFIs.insert(FI);
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ // We can't hoist an instruction defining a physreg that is clobbered in
+ // the loop.
+ if (MO.isRegMask()) {
+ PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "Not expecting virtual register!");
+
+ if (!MO.isDef()) {
+ if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
+ // If it's using a non-loop-invariant register, then it's obviously not
+ // safe to hoist.
+ HasNonInvariantUse = true;
+ continue;
+ }
+
+ if (MO.isImplicit()) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ PhysRegClobbers.set(*AI);
+ if (!MO.isDead())
+ // Non-dead implicit def? This cannot be hoisted.
+ RuledOut = true;
+ // No need to check if a dead implicit def is also defined by
+ // another instruction.
+ continue;
+ }
+
+ // FIXME: For now, avoid instructions with multiple defs, unless
+ // it's a dead implicit def.
+ if (Def)
+ RuledOut = true;
+ else
+ Def = Reg;
+
+ // If we have already seen another instruction that defines the same
+ // register, then this is not safe. Two defs is indicated by setting a
+ // PhysRegClobbers bit.
+ for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
+ if (PhysRegDefs.test(*AS))
+ PhysRegClobbers.set(*AS);
+ PhysRegDefs.set(*AS);
+ }
+ if (PhysRegClobbers.test(Reg))
+ // MI defined register is seen defined by another instruction in
+ // the loop, it cannot be a LICM candidate.
+ RuledOut = true;
+ }
+
+ // Only consider reloads for now and remats which do not have register
+ // operands. FIXME: Consider unfold load folding instructions.
+ if (Def && !RuledOut) {
+ int FI = INT_MIN;
+ if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+ (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+ Candidates.push_back(CandidateInfo(MI, Def, FI));
+ }
+}
+
+/// Walk the specified region of the CFG and hoist loop invariants out to the
+/// preheader.
+void MachineLICM::HoistRegionPostRA() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ unsigned NumRegs = TRI->getNumRegs();
+ BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
+ BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
+
+ SmallVector<CandidateInfo, 32> Candidates;
+ SmallSet<int, 32> StoredFIs;
+
+ // Walk the entire region, count number of defs for each register, and
+ // collect potential LICM candidates.
+ const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
+ for (MachineBasicBlock *BB : Blocks) {
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isEHPad()) continue;
+
+ // Conservatively treat live-in's as an external def.
+ // FIXME: That means a reload that're reused in successor block(s) will not
+ // be LICM'ed.
+ for (const auto &LI : BB->liveins()) {
+ for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
+ PhysRegDefs.set(*AI);
+ }
+
+ SpeculationState = SpeculateUnknown;
+ for (MachineInstr &MI : *BB)
+ ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+ }
+
+ // Gather the registers read / clobbered by the terminator.
+ BitVector TermRegs(NumRegs);
+ MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
+ if (TI != Preheader->end()) {
+ for (const MachineOperand &MO : TI->operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ TermRegs.set(*AI);
+ }
+ }
+
+ // Now evaluate whether the potential candidates qualify.
+ // 1. Check if the candidate defined register is defined by another
+ // instruction in the loop.
+ // 2. If the candidate is a load from stack slot (always true for now),
+ // check if the slot is stored anywhere in the loop.
+ // 3. Make sure candidate def should not clobber
+ // registers read by the terminator. Similarly its def should not be
+ // clobbered by the terminator.
+ for (CandidateInfo &Candidate : Candidates) {
+ if (Candidate.FI != INT_MIN &&
+ StoredFIs.count(Candidate.FI))
+ continue;
+
+ unsigned Def = Candidate.Def;
+ if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
+ bool Safe = true;
+ MachineInstr *MI = Candidate.MI;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.isDef() || !MO.getReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (PhysRegDefs.test(Reg) ||
+ PhysRegClobbers.test(Reg)) {
+ // If it's using a non-loop-invariant register, then it's obviously
+ // not safe to hoist.
+ Safe = false;
+ break;
+ }
+ }
+ if (Safe)
+ HoistPostRA(MI, Candidate.Def);
+ }
+ }
+}
+
+/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
+/// sure it is not killed by any instructions in the loop.
+void MachineLICM::AddToLiveIns(unsigned Reg) {
+ const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
+ for (MachineBasicBlock *BB : Blocks) {
+ if (!BB->isLiveIn(Reg))
+ BB->addLiveIn(Reg);
+ for (MachineInstr &MI : *BB) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+ if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+ MO.setIsKill(false);
+ }
+ }
+ }
+}
+
+/// When an instruction is found to only use loop invariant operands that is
+/// safe to hoist, this instruction is called to do the dirty work.
+void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
+ << MI->getParent()->getNumber() << ": " << *MI);
+
+ // Splice the instruction to the preheader.
+ MachineBasicBlock *MBB = MI->getParent();
+ Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
+
+ // Add register to livein list to all the BBs in the current loop since a
+ // loop invariant must be kept live throughout the whole loop. This is
+ // important to ensure later passes do not scavenge the def register.
+ AddToLiveIns(Def);
+
+ ++NumPostRAHoisted;
+ Changed = true;
+}
+
+/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
+/// may not be safe to hoist.
+bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
+ if (SpeculationState != SpeculateUnknown)
+ return SpeculationState == SpeculateFalse;
+
+ if (BB != CurLoop->getHeader()) {
+ // Check loop exiting blocks.
+ SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
+ CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
+ for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
+ if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
+ SpeculationState = SpeculateTrue;
+ return false;
+ }
+ }
+
+ SpeculationState = SpeculateFalse;
+ return true;
+}
+
+void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n');
+
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+}
+
+void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n');
+ BackTrace.pop_back();
+}
+
+/// Destroy scope for the MBB that corresponds to the given dominator tree node
+/// if its a leaf or all of its children are done. Walk up the dominator tree to
+/// destroy ancestors which are now done.
+void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
+ DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ if (OpenChildren[Node])
+ return;
+
+ // Pop scope.
+ ExitScope(Node->getBlock());
+
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+ unsigned Left = --OpenChildren[Parent];
+ if (Left != 0)
+ break;
+ ExitScope(Parent->getBlock());
+ Node = Parent;
+ }
+}
+
+/// Walk the specified loop in the CFG (defined by all blocks dominated by the
+/// specified header block, and that are in the current loop) in depth first
+/// order w.r.t the DominatorTree. This allows us to visit definitions before
+/// uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ SmallVector<MachineDomTreeNode*, 32> Scopes;
+ SmallVector<MachineDomTreeNode*, 8> WorkList;
+ DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+ DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+ // Perform a DFS walk to determine the order of visit.
+ WorkList.push_back(HeaderN);
+ while (!WorkList.empty()) {
+ MachineDomTreeNode *Node = WorkList.pop_back_val();
+ assert(Node && "Null dominator tree node?");
+ MachineBasicBlock *BB = Node->getBlock();
+
+ // If the header of the loop containing this basic block is a landing pad,
+ // then don't try to hoist instructions out of this loop.
+ const MachineLoop *ML = MLI->getLoopFor(BB);
+ if (ML && ML->getHeader()->isEHPad())
+ continue;
+
+ // If this subregion is not in the top level loop at all, exit.
+ if (!CurLoop->contains(BB))
+ continue;
+
+ Scopes.push_back(Node);
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ unsigned NumChildren = Children.size();
+
+ // Don't hoist things out of a large switch statement. This often causes
+ // code to be hoisted that wasn't going to be executed, and increases
+ // register pressure in a situation where it's likely to matter.
+ if (BB->succ_size() >= 25)
+ NumChildren = 0;
+
+ OpenChildren[Node] = NumChildren;
+ // Add children in reverse order as then the next popped worklist node is
+ // the first child of this node. This means we ultimately traverse the
+ // DOM tree in exactly the same order as if we'd recursed.
+ for (int i = (int)NumChildren-1; i >= 0; --i) {
+ MachineDomTreeNode *Child = Children[i];
+ ParentMap[Child] = Node;
+ WorkList.push_back(Child);
+ }
+ }
+
+ if (Scopes.size() == 0)
+ return;
+
+ // Compute registers which are livein into the loop headers.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
+
+ // Now perform LICM.
+ for (MachineDomTreeNode *Node : Scopes) {
+ MachineBasicBlock *MBB = Node->getBlock();
+
+ EnterScope(MBB);
+
+ // Process the block
+ SpeculationState = SpeculateUnknown;
+ for (MachineBasicBlock::iterator
+ MII = MBB->begin(), E = MBB->end(); MII != E; ) {
+ MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+ MachineInstr *MI = &*MII;
+ if (!Hoist(MI, Preheader))
+ UpdateRegPressure(MI);
+ MII = NextMII;
+ }
+
+ // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+ ExitScopeIfDone(Node, OpenChildren, ParentMap);
+ }
+}
+
+/// Sink instructions into loops if profitable. This especially tries to prevent
+/// register spills caused by register pressure if there is little to no
+/// overhead moving instructions into loops.
+void MachineLICM::SinkIntoLoop() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ SmallVector<MachineInstr *, 8> Candidates;
+ for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
+ I != Preheader->instr_end(); ++I) {
+ // We need to ensure that we can safely move this instruction into the loop.
+ // As such, it must not have side-effects, e.g. such as a call has.
+ if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
+ Candidates.push_back(&*I);
+ }
+
+ for (MachineInstr *I : Candidates) {
+ const MachineOperand &MO = I->getOperand(0);
+ if (!MO.isDef() || !MO.isReg() || !MO.getReg())
+ continue;
+ if (!MRI->hasOneDef(MO.getReg()))
+ continue;
+ bool CanSink = true;
+ MachineBasicBlock *B = nullptr;
+ for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+ // FIXME: Come up with a proper cost model that estimates whether sinking
+ // the instruction (and thus possibly executing it on every loop
+ // iteration) is more expensive than a register.
+ // For now assumes that copies are cheap and thus almost always worth it.
+ if (!MI.isCopy()) {
+ CanSink = false;
+ break;
+ }
+ if (!B) {
+ B = MI.getParent();
+ continue;
+ }
+ B = DT->findNearestCommonDominator(B, MI.getParent());
+ if (!B) {
+ CanSink = false;
+ break;
+ }
+ }
+ if (!CanSink || !B || B == Preheader)
+ continue;
+ B->splice(B->getFirstNonPHI(), Preheader, I);
+ }
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// Find all virtual register references that are liveout of the preheader to
+/// initialize the starting "register pressure". Note this does not count live
+/// through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+ // If the preheader has only a single predecessor and it ends with a
+ // fallthrough or an unconditional branch, then scan its predecessor for live
+ // defs as well. This happens whenever the preheader is created by splitting
+ // the critical edge from the loop predecessor to the loop header.
+ if (BB->pred_size() == 1) {
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+ InitRegPressure(*BB->pred_begin());
+ }
+
+ for (const MachineInstr &MI : *BB)
+ UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
+}
+
+/// Update estimate of register pressure after the specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
+ bool ConsiderUnseenAsDef) {
+ auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
+ for (const auto &RPIdAndCost : Cost) {
+ unsigned Class = RPIdAndCost.first;
+ if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
+ RegPressure[Class] = 0;
+ else
+ RegPressure[Class] += RPIdAndCost.second;
+ }
+}
+
+/// Calculate the additional register pressure that the registers used in MI
+/// cause.
+///
+/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
+/// figure out which usages are live-ins.
+/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
+DenseMap<unsigned, int>
+MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
+ bool ConsiderUnseenAsDef) {
+ DenseMap<unsigned, int> Cost;
+ if (MI->isImplicitDef())
+ return Cost;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // FIXME: It seems bad to use RegSeen only for some of these calculations.
+ bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+
+ RegClassWeight W = TRI->getRegClassWeight(RC);
+ int RCCost = 0;
+ if (MO.isDef())
+ RCCost = W.RegWeight;
+ else {
+ bool isKill = isOperandKill(MO, MRI);
+ if (isNew && !isKill && ConsiderUnseenAsDef)
+ // Haven't seen this, it must be a livein.
+ RCCost = W.RegWeight;
+ else if (!isNew && isKill)
+ RCCost = -W.RegWeight;
+ }
+ if (RCCost == 0)
+ continue;
+ const int *PS = TRI->getRegClassPressureSets(RC);
+ for (; *PS != -1; ++PS) {
+ if (Cost.find(*PS) == Cost.end())
+ Cost[*PS] = RCCost;
+ else
+ Cost[*PS] += RCCost;
+ }
+ }
+ return Cost;
+}
+
+/// Return true if this machine instruction loads from global offset table or
+/// constant pool.
+static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
+ assert (MI.mayLoad() && "Expected MI that loads!");
+
+ // If we lost memory operands, conservatively assume that the instruction
+ // reads from everything..
+ if (MI.memoperands_empty())
+ return true;
+
+ for (MachineMemOperand *MemOp : MI.memoperands())
+ if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
+ if (PSV->isGOT() || PSV->isConstantPool())
+ return true;
+
+ return false;
+}
+
+/// Returns true if the instruction may be a suitable candidate for LICM.
+/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
+bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+ // Check if it's safe to move the instruction.
+ bool DontMoveAcrossStore = true;
+ if (!I.isSafeToMove(AA, DontMoveAcrossStore))
+ return false;
+
+ // If it is load then check if it is guaranteed to execute by making sure that
+ // it dominates all exiting blocks. If it doesn't, then there is a path out of
+ // the loop which does not execute this load, so we can't hoist it. Loads
+ // from constant memory are not safe to speculate all the time, for example
+ // indexed load from a jump table.
+ // Stores and side effects are already checked by isSafeToMove.
+ if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
+ !IsGuaranteedToExecute(I.getParent()))
+ return false;
+
+ return true;
+}
+
+/// Returns true if the instruction is loop invariant.
+/// I.e., all virtual register operands are defined outside of the loop,
+/// physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+///
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+ if (!IsLICMCandidate(I))
+ return false;
+
+ // The instruction is loop invariant if all of its operands are.
+ for (const MachineOperand &MO : I.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // Don't hoist an instruction that uses or defines a physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
+ return false;
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return false;
+ } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
+ // If the reg is live into the loop, we can't hoist an instruction
+ // which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (CurLoop->contains(MRI->getVRegDef(Reg)))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
+
+/// Return true if the specified instruction is used by a phi node and hoisting
+/// it could cause a copy to be inserted.
+bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
+ SmallVector<const MachineInstr*, 8> Work(1, MI);
+ do {
+ MI = Work.pop_back_val();
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
+ // A PHI may cause a copy to be inserted.
+ if (UseMI.isPHI()) {
+ // A PHI inside the loop causes a copy because the live range of Reg is
+ // extended across the PHI.
+ if (CurLoop->contains(&UseMI))
+ return true;
+ // A PHI in an exit block can cause a copy to be inserted if the PHI
+ // has multiple predecessors in the loop with different values.
+ // For now, approximate by rejecting all exit blocks.
+ if (isExitBlock(UseMI.getParent()))
+ return true;
+ continue;
+ }
+ // Look past copies as well.
+ if (UseMI.isCopy() && CurLoop->contains(&UseMI))
+ Work.push_back(&UseMI);
+ }
+ }
+ } while (!Work.empty());
+ return false;
+}
+
+/// Compute operand latency between a def of 'Reg' and an use in the current
+/// loop, return true if the target considered it high.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+ unsigned DefIdx, unsigned Reg) const {
+ if (MRI->use_nodbg_empty(Reg))
+ return false;
+
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
+ if (UseMI.isCopyLike())
+ continue;
+ if (!CurLoop->contains(UseMI.getParent()))
+ continue;
+ for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
+ return true;
+ }
+
+ // Only look at the first in loop use.
+ break;
+ }
+
+ return false;
+}
+
+/// Return true if the instruction is marked "cheap" or the operand latency
+/// between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+ if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
+ return true;
+
+ bool isCheap = false;
+ unsigned NumDefs = MI.getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &DefMO = MI.getOperand(i);
+ if (!DefMO.isReg() || !DefMO.isDef())
+ continue;
+ --NumDefs;
+ unsigned Reg = DefMO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ if (!TII->hasLowDefLatency(SchedModel, MI, i))
+ return false;
+ isCheap = true;
+ }
+
+ return isCheap;
+}
+
+/// Visit BBs from header to current BB, check if hoisting an instruction of the
+/// given cost matrix can cause high register pressure.
+bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
+ bool CheapInstr) {
+ for (const auto &RPIdAndCost : Cost) {
+ if (RPIdAndCost.second <= 0)
+ continue;
+
+ unsigned Class = RPIdAndCost.first;
+ int Limit = RegLimit[Class];
+
+ // Don't hoist cheap instructions if they would increase register pressure,
+ // even if we're under the limit.
+ if (CheapInstr && !HoistCheapInsts)
+ return true;
+
+ for (const auto &RP : BackTrace)
+ if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
+ return true;
+ }
+
+ return false;
+}
+
+/// Traverse the back trace from header to the current block and update their
+/// register pressures to reflect the effect of hoisting MI from the current
+/// block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+ // First compute the 'cost' of the instruction, i.e. its contribution
+ // to register pressure.
+ auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
+ /*ConsiderUnseenAsDef=*/false);
+
+ // Update register pressure of blocks from loop header to current block.
+ for (auto &RP : BackTrace)
+ for (const auto &RPIdAndCost : Cost)
+ RP[RPIdAndCost.first] += RPIdAndCost.second;
+}
+
+/// Return true if it is potentially profitable to hoist the given loop
+/// invariant.
+bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+ if (MI.isImplicitDef())
+ return true;
+
+ // Besides removing computation from the loop, hoisting an instruction has
+ // these effects:
+ //
+ // - The value defined by the instruction becomes live across the entire
+ // loop. This increases register pressure in the loop.
+ //
+ // - If the value is used by a PHI in the loop, a copy will be required for
+ // lowering the PHI after extending the live range.
+ //
+ // - When hoisting the last use of a value in the loop, that value no longer
+ // needs to be live in the loop. This lowers register pressure in the loop.
+
+ bool CheapInstr = IsCheapInstruction(MI);
+ bool CreatesCopy = HasLoopPHIUse(&MI);
+
+ // Don't hoist a cheap instruction if it would create a copy in the loop.
+ if (CheapInstr && CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
+ return false;
+ }
+
+ // Rematerializable instructions should always be hoisted since the register
+ // allocator can just pull them down again when needed.
+ if (TII->isTriviallyReMaterializable(MI, AA))
+ return true;
+
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
+ DEBUG(dbgs() << "Hoist High Latency: " << MI);
+ ++NumHighLatency;
+ return true;
+ }
+ }
+
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // Cheap instructions will only be hoisted if they don't increase register
+ // pressure at all.
+ auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
+ /*ConsiderUnseenAsDef=*/false);
+
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
+ DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
+ ++NumLowRP;
+ return true;
+ }
+
+ // Don't risk increasing register pressure if it would create copies.
+ if (CreatesCopy) {
+ DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
+ return false;
+ }
+
+ // Do not "speculate" in high register pressure situation. If an
+ // instruction is not guaranteed to be executed in the loop, it's best to be
+ // conservative.
+ if (AvoidSpeculation &&
+ (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+ DEBUG(dbgs() << "Won't speculate: " << MI);
+ return false;
+ }
+
+ // High register pressure situation, only hoist if the instruction is going
+ // to be remat'ed.
+ if (!TII->isTriviallyReMaterializable(MI, AA) && !MI.isInvariantLoad(AA)) {
+ DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
+ return false;
+ }
+
+ return true;
+}
+
+/// Unfold a load from the given machineinstr if the load itself could be
+/// hoisted. Return the unfolded and hoistable load, or null if the load
+/// couldn't be unfolded or if it wouldn't be hoistable.
+MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+ // Don't unfold simple loads.
+ if (MI->canFoldAsLoad())
+ return nullptr;
+
+ // If not, we may be able to unfold a load and hoist that.
+ // First test whether the instruction is loading from an amenable
+ // memory location.
+ if (!MI->isInvariantLoad(AA))
+ return nullptr;
+
+ // Next determine the register class for a temporary register.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc == 0) return nullptr;
+ const MCInstrDesc &MID = TII->get(NewOpc);
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
+ // Ok, we're unfolding. Create a temporary register and do the unfold.
+ unsigned Reg = MRI->createVirtualRegister(RC);
+
+ SmallVector<MachineInstr *, 2> NewMIs;
+ bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false, NewMIs);
+ (void)Success;
+ assert(Success &&
+ "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+ "succeeded!");
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator Pos = MI;
+ MBB->insert(Pos, NewMIs[0]);
+ MBB->insert(Pos, NewMIs[1]);
+ // If unfolding produced a load that wasn't loop-invariant or profitable to
+ // hoist, discard the new instructions and bail.
+ if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ return nullptr;
+ }
+
+ // Update register pressure for the unfolded instruction.
+ UpdateRegPressure(NewMIs[1]);
+
+ // Otherwise we successfully unfolded a load that we can hoist.
+ MI->eraseFromParent();
+ return NewMIs[0];
+}
+
+/// Initialize the CSE map with instructions that are in the current loop
+/// preheader that may become duplicates of instructions that are hoisted
+/// out of the loop.
+void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
+ for (MachineInstr &MI : *BB)
+ CSEMap[MI.getOpcode()].push_back(&MI);
+}
+
+/// Find an instruction amount PrevMIs that is a duplicate of MI.
+/// Return this instruction if it's found.
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs) {
+ for (const MachineInstr *PrevMI : PrevMIs)
+ if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
+ return PrevMI;
+
+ return nullptr;
+}
+
+/// Given a LICM'ed instruction, look for an instruction on the preheader that
+/// computes the same value. If it's found, do a RAU on with the definition of
+/// the existing instruction rather than hoisting the instruction to the
+/// preheader.
+bool MachineLICM::EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+ // Replace virtual registers defined by MI by their counterparts defined
+ // by Dup.
+ SmallVector<unsigned, 2> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ // Physical registers may not differ here.
+ assert((!MO.isReg() || MO.getReg() == 0 ||
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ MO.getReg() == Dup->getOperand(i).getReg()) &&
+ "Instructions with different phys regs are not identical!");
+
+ if (MO.isReg() && MO.isDef() &&
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ Defs.push_back(i);
+ }
+
+ SmallVector<const TargetRegisterClass*, 2> OrigRCs;
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Idx = Defs[i];
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ OrigRCs.push_back(MRI->getRegClass(DupReg));
+
+ if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
+ // Restore old RCs if more than one defs.
+ for (unsigned j = 0; j != i; ++j)
+ MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
+ return false;
+ }
+ }
+
+ for (unsigned Idx : Defs) {
+ unsigned Reg = MI->getOperand(Idx).getReg();
+ unsigned DupReg = Dup->getOperand(Idx).getReg();
+ MRI->replaceRegWith(Reg, DupReg);
+ MRI->clearKillFlags(DupReg);
+ }
+
+ MI->eraseFromParent();
+ ++NumCSEed;
+ return true;
+ }
+ return false;
+}
+
+/// Return true if the given instruction will be CSE'd if it's hoisted out of
+/// the loop.
+bool MachineLICM::MayCSE(MachineInstr *MI) {
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
+ return false;
+
+ return LookForDuplicate(MI, CI->second) != nullptr;
+}
+
+/// When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+/// It returns true if the instruction is hoisted.
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+ // First check whether we should hoist this instruction.
+ if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+ // If not, try unfolding a hoistable load.
+ MI = ExtractHoistableLoad(MI);
+ if (!MI) return false;
+ }
+
+ // Now move the instructions to the predecessor, inserting it before any
+ // terminator instructions.
+ DEBUG({
+ dbgs() << "Hoisting " << *MI;
+ if (MI->getParent()->getBasicBlock())
+ dbgs() << " from BB#" << MI->getParent()->getNumber();
+ if (Preheader->getBasicBlock())
+ dbgs() << " to BB#" << Preheader->getNumber();
+ dbgs() << "\n";
+ });
+
+ // If this is the first instruction being hoisted to the preheader,
+ // initialize the CSE map with potential common expressions.
+ if (FirstInLoop) {
+ InitCSEMap(Preheader);
+ FirstInLoop = false;
+ }
+
+ // Look for opportunity to CSE the hoisted instruction.
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (!EliminateCSE(MI, CI)) {
+ // Otherwise, splice the instruction to the preheader.
+ Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+
+ // Update register pressure for BBs from header to this block.
+ UpdateBackTraceRegPressure(MI);
+
+ // Clear the kill flags of any register this instruction defines,
+ // since they may need to be live throughout the entire loop
+ // rather than just live for part of it.
+ for (MachineOperand &MO : MI->operands())
+ if (MO.isReg() && MO.isDef() && !MO.isDead())
+ MRI->clearKillFlags(MO.getReg());
+
+ // Add to the CSE map.
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else
+ CSEMap[Opcode].push_back(MI);
+ }
+
+ ++NumHoisted;
+ Changed = true;
+
+ return true;
+}
+
+/// Get the preheader for the current loop, splitting a critical edge if needed.
+MachineBasicBlock *MachineLICM::getCurPreheader() {
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop predecessor, we can't do any hoisting.
+
+ // If we've tried to get a preheader and failed, don't try again.
+ if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
+ return nullptr;
+
+ if (!CurPreheader) {
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader) {
+ MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
+ if (!Pred) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return nullptr;
+ }
+
+ CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
+ if (!CurPreheader) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return nullptr;
+ }
+ }
+ }
+ return CurPreheader;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 000000000000..376f78fda1c4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,84 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG. Note that
+// the loops identified may actually be several natural loops that share the
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
+template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
+template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
+
+char MachineLoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+
+char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ LI.analyze(getAnalysis<MachineDominatorTree>().getBase());
+ return false;
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineBasicBlock *MachineLoop::getTopBlock() {
+ MachineBasicBlock *TopMBB = getHeader();
+ MachineFunction::iterator Begin = TopMBB->getParent()->begin();
+ if (TopMBB->getIterator() != Begin) {
+ MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator());
+ while (contains(PriorMBB)) {
+ TopMBB = PriorMBB;
+ if (TopMBB->getIterator() == Begin)
+ break;
+ PriorMBB = &*std::prev(TopMBB->getIterator());
+ }
+ }
+ return TopMBB;
+}
+
+MachineBasicBlock *MachineLoop::getBottomBlock() {
+ MachineBasicBlock *BotMBB = getHeader();
+ MachineFunction::iterator End = BotMBB->getParent()->end();
+ if (BotMBB->getIterator() != std::prev(End)) {
+ MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator());
+ while (contains(NextMBB)) {
+ BotMBB = NextMBB;
+ if (BotMBB == &*std::next(BotMBB->getIterator()))
+ break;
+ NextMBB = &*std::next(BotMBB->getIterator());
+ }
+ }
+ return BotMBB;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineLoop::dump() const {
+ print(dbgs());
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 000000000000..244e3fbc4e8f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,463 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use DataLayout's.
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+ "Machine Module Information", false, false)
+char MachineModuleInfo::ID = 0;
+
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+
+namespace llvm {
+class MMIAddrLabelMapCallbackPtr final : CallbackVH {
+ MMIAddrLabelMap *Map;
+public:
+ MMIAddrLabelMapCallbackPtr() : Map(nullptr) {}
+ MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {}
+
+ void setPtr(BasicBlock *BB) {
+ ValueHandleBase::operator=(BB);
+ }
+
+ void setMap(MMIAddrLabelMap *map) { Map = map; }
+
+ void deleted() override;
+ void allUsesReplacedWith(Value *V2) override;
+};
+
+class MMIAddrLabelMap {
+ MCContext &Context;
+ struct AddrLabelSymEntry {
+ /// Symbols - The symbols for the label.
+ TinyPtrVector<MCSymbol *> Symbols;
+
+ Function *Fn; // The containing function of the BasicBlock.
+ unsigned Index; // The index in BBCallbacks for the BasicBlock.
+ };
+
+ DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+
+ /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We
+ /// use this so we get notified if a block is deleted or RAUWd.
+ std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
+
+ /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
+ /// whose corresponding BasicBlock got deleted. These symbols need to be
+ /// emitted at some point in the file, so AsmPrinter emits them after the
+ /// function body.
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+ DeletedAddrLabelsNeedingEmission;
+public:
+
+ MMIAddrLabelMap(MCContext &context) : Context(context) {}
+ ~MMIAddrLabelMap() {
+ assert(DeletedAddrLabelsNeedingEmission.empty() &&
+ "Some labels for deleted blocks never got emitted");
+ }
+
+ ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+ void takeDeletedSymbolsForFunction(Function *F,
+ std::vector<MCSymbol*> &Result);
+
+ void UpdateForDeletedBlock(BasicBlock *BB);
+ void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+}
+
+ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ // If we already had an entry for this block, just return it.
+ if (!Entry.Symbols.empty()) {
+ assert(BB->getParent() == Entry.Fn && "Parent changed");
+ return Entry.Symbols;
+ }
+
+ // Otherwise, this is a new entry, create a new symbol for it and add an
+ // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+ BBCallbacks.emplace_back(BB);
+ BBCallbacks.back().setMap(this);
+ Entry.Index = BBCallbacks.size() - 1;
+ Entry.Fn = BB->getParent();
+ Entry.Symbols.push_back(Context.createTempSymbol());
+ return Entry.Symbols;
+}
+
+/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
+/// them.
+void MMIAddrLabelMap::
+takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+ DeletedAddrLabelsNeedingEmission.find(F);
+
+ // If there are no entries for the function, just return.
+ if (I == DeletedAddrLabelsNeedingEmission.end()) return;
+
+ // Otherwise, take the list.
+ std::swap(Result, I->second);
+ DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+
+void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+ // If the block got deleted, there is no need for the symbol. If the symbol
+ // was already emitted, we can just forget about it, otherwise we need to
+ // queue it up for later emission when the function is output.
+ AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]);
+ AddrLabelSymbols.erase(BB);
+ assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?");
+ BBCallbacks[Entry.Index] = nullptr; // Clear the callback.
+
+ assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
+ "Block/parent mismatch");
+
+ for (MCSymbol *Sym : Entry.Symbols) {
+ if (Sym->isDefined())
+ return;
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ }
+}
+
+void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+ // Get the entry for the RAUW'd block and remove it from our map.
+ AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]);
+ AddrLabelSymbols.erase(Old);
+ assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?");
+
+ AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+ // If New is not address taken, just move our symbol over to it.
+ if (NewEntry.Symbols.empty()) {
+ BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
+ NewEntry = std::move(OldEntry); // Set New's entry.
+ return;
+ }
+
+ BBCallbacks[OldEntry.Index] = nullptr; // Update the callback.
+
+ // Otherwise, we need to add the old symbols to the new block's set.
+ NewEntry.Symbols.insert(NewEntry.Symbols.end(), OldEntry.Symbols.begin(),
+ OldEntry.Symbols.end());
+}
+
+
+void MMIAddrLabelMapCallbackPtr::deleted() {
+ Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+ Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+ const MCRegisterInfo &MRI,
+ const MCObjectFileInfo *MOFI)
+ : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, nullptr, false) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineModuleInfo::MachineModuleInfo()
+ : ImmutablePass(ID), Context(nullptr, nullptr, nullptr) {
+ llvm_unreachable("This MachineModuleInfo constructor should never be called, "
+ "MMI should always be explicitly constructed by "
+ "LLVMTargetMachine");
+}
+
+MachineModuleInfo::~MachineModuleInfo() {
+}
+
+bool MachineModuleInfo::doInitialization(Module &M) {
+
+ ObjFileMMI = nullptr;
+ CurCallSite = 0;
+ CallsEHReturn = false;
+ CallsUnwindInit = false;
+ HasEHFunclets = false;
+ DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
+ PersonalityTypeCache = EHPersonality::Unknown;
+ AddrLabelSymbols = nullptr;
+ TheModule = nullptr;
+
+ return false;
+}
+
+bool MachineModuleInfo::doFinalization(Module &M) {
+
+ Personalities.clear();
+
+ delete AddrLabelSymbols;
+ AddrLabelSymbols = nullptr;
+
+ Context.reset();
+
+ delete ObjFileMMI;
+ ObjFileMMI = nullptr;
+
+ return false;
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+ // Clean up frame info.
+ FrameInstructions.clear();
+
+ // Clean up exception info.
+ LandingPads.clear();
+ PersonalityTypeCache = EHPersonality::Unknown;
+ CallSiteMap.clear();
+ TypeInfos.clear();
+ FilterIds.clear();
+ FilterEnds.clear();
+ CallsEHReturn = false;
+ CallsUnwindInit = false;
+ HasEHFunclets = false;
+ VariableDbgInfos.clear();
+}
+
+//===- Address of Block Management ----------------------------------------===//
+
+/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
+/// basic block when its address is taken. If other blocks were RAUW'd to
+/// this one, we may have to emit them as well, return the whole set.
+ArrayRef<MCSymbol *>
+MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (!AddrLabelSymbols)
+ AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
+}
+
+
+/// takeDeletedSymbolsForFunction - If the specified function has had any
+/// references to address-taken blocks generated, but the block got deleted,
+/// return the symbol now so we can emit it. This prevents emitting a
+/// reference to a symbol that has no definition.
+void MachineModuleInfo::
+takeDeletedSymbolsForFunction(const Function *F,
+ std::vector<MCSymbol*> &Result) {
+ // If no blocks have had their addresses taken, we're done.
+ if (!AddrLabelSymbols) return;
+ return AddrLabelSymbols->
+ takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
+}
+
+//===- EH -----------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+ (MachineBasicBlock *LandingPad) {
+ unsigned N = LandingPads.size();
+ for (unsigned i = 0; i < N; ++i) {
+ LandingPadInfo &LP = LandingPads[i];
+ if (LP.LandingPadBlock == LandingPad)
+ return LP;
+ }
+
+ LandingPads.push_back(LandingPadInfo(LandingPad));
+ return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+ MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.BeginLabels.push_back(BeginLabel);
+ LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+ MCSymbol *LandingPadLabel = Context.createTempSymbol();
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.LandingPadLabel = LandingPadLabel;
+ return LandingPadLabel;
+}
+
+void MachineModuleInfo::addPersonality(const Function *Personality) {
+ for (unsigned i = 0; i < Personalities.size(); ++i)
+ if (Personalities[i] == Personality)
+ return;
+ Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::
+addCatchTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalValue *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ for (unsigned N = TyInfo.size(); N; --N)
+ LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::
+addFilterTypeInfo(MachineBasicBlock *LandingPad,
+ ArrayRef<const GlobalValue *> TyInfo) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ std::vector<unsigned> IdsInFilter(TyInfo.size());
+ for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+ IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+ LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ LP.TypeIds.push_back(0);
+}
+
+void MachineModuleInfo::addSEHCatchHandler(MachineBasicBlock *LandingPad,
+ const Function *Filter,
+ const BlockAddress *RecoverBA) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ SEHHandler Handler;
+ Handler.FilterOrFinally = Filter;
+ Handler.RecoverBA = RecoverBA;
+ LP.SEHHandlers.push_back(Handler);
+}
+
+void MachineModuleInfo::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
+ const Function *Cleanup) {
+ LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+ SEHHandler Handler;
+ Handler.FilterOrFinally = Cleanup;
+ Handler.RecoverBA = nullptr;
+ LP.SEHHandlers.push_back(Handler);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+ for (unsigned i = 0; i != LandingPads.size(); ) {
+ LandingPadInfo &LandingPad = LandingPads[i];
+ if (LandingPad.LandingPadLabel &&
+ !LandingPad.LandingPadLabel->isDefined() &&
+ (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+ LandingPad.LandingPadLabel = nullptr;
+
+ // Special case: we *should* emit LPs with null LP MBB. This indicates
+ // "nounwind" case.
+ if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+ MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+ MCSymbol *EndLabel = LandingPad.EndLabels[j];
+ if ((BeginLabel->isDefined() ||
+ (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+ (EndLabel->isDefined() ||
+ (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+ LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+ LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+ --j;
+ --e;
+ }
+
+ // Remove landing pads with no try-ranges.
+ if (LandingPads[i].BeginLabels.empty()) {
+ LandingPads.erase(LandingPads.begin() + i);
+ continue;
+ }
+
+ // If there is no landing pad, ensure that the list of typeids is empty.
+ // If the only typeid is a cleanup, this is the same as having no typeids.
+ if (!LandingPad.LandingPadBlock ||
+ (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+ LandingPad.TypeIds.clear();
+ ++i;
+ }
+}
+
+/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site
+/// indexes.
+void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
+ ArrayRef<unsigned> Sites) {
+ LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo. This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) {
+ for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+ if (TypeInfos[i] == TI) return i + 1;
+
+ TypeInfos.push_back(TI);
+ return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos. This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+ // If the new filter coincides with the tail of an existing filter, then
+ // re-use the existing filter. Folding filters more than this requires
+ // re-ordering filters and/or their elements - probably not worth it.
+ for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+ E = FilterEnds.end(); I != E; ++I) {
+ unsigned i = *I, j = TyIds.size();
+
+ while (i && j)
+ if (FilterIds[--i] != TyIds[--j])
+ goto try_next;
+
+ if (!j)
+ // The new filter coincides with range [i, end) of the existing filter.
+ return -(1 + i);
+
+try_next:;
+ }
+
+ // Add the new filter.
+ int FilterID = -(1 + FilterIds.size());
+ FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+ FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
+ FilterEnds.push_back(FilterIds.size());
+ FilterIds.push_back(0); // terminator
+ return FilterID;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 000000000000..22d519e5d88f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,44 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::anchor() {}
+void MachineModuleInfoELF::anchor() {}
+
+static int SortSymbolPair(const void *LHS, const void *RHS) {
+ typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
+ const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
+ const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
+ return LHSS->getName().compare(RHSS->getName());
+}
+
+MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs(
+ DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) {
+ MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
+ if (!List.empty())
+ qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+
+ Map.clear();
+ return List;
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 000000000000..3ee3e40b27e2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,55 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+void MachinePassRegistryListener::anchor() { }
+
+/// setDefault - Set the default constructor by name.
+void MachinePassRegistry::setDefault(StringRef Name) {
+ MachinePassCtor Ctor = nullptr;
+ for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) {
+ if (R->getName() == Name) {
+ Ctor = R->getCtor();
+ break;
+ }
+ }
+ assert(Ctor && "Unregistered pass name");
+ setDefault(Ctor);
+}
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+ Node->setNext(List);
+ List = Node;
+ if (Listener) Listener->NotifyAdd(Node->getName(),
+ Node->getCtor(),
+ Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+ for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+ if (*I == Node) {
+ if (Listener) Listener->NotifyRemove(Node->getName());
+ *I = (*I)->getNext();
+ break;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
new file mode 100644
index 000000000000..c3f6e9249e7d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -0,0 +1,55 @@
+//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// post dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePostDominators.h"
+
+using namespace llvm;
+
+char MachinePostDominatorTree::ID = 0;
+
+//declare initializeMachinePostDominatorTreePass
+INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
+ "MachinePostDominator Tree Construction", true, true)
+
+MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
+ initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+}
+
+FunctionPass *
+MachinePostDominatorTree::createMachinePostDominatorTreePass() {
+ return new MachinePostDominatorTree();
+}
+
+bool
+MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+ delete DT;
+}
+
+void
+MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void
+MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const {
+ DT->print(OS);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
new file mode 100644
index 000000000000..fc32183c7f63
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp
@@ -0,0 +1,140 @@
+
+#include "llvm/CodeGen/MachineRegionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/RegionInfoImpl.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+
+#define DEBUG_TYPE "region"
+
+using namespace llvm;
+
+STATISTIC(numMachineRegions, "The # of machine regions");
+STATISTIC(numMachineSimpleRegions, "The # of simple machine regions");
+
+namespace llvm {
+template class RegionBase<RegionTraits<MachineFunction>>;
+template class RegionNodeBase<RegionTraits<MachineFunction>>;
+template class RegionInfoBase<RegionTraits<MachineFunction>>;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineRegion implementation
+//
+
+MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit,
+ MachineRegionInfo* RI,
+ MachineDominatorTree *DT, MachineRegion *Parent) :
+ RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {
+
+}
+
+MachineRegion::~MachineRegion() { }
+
+//===----------------------------------------------------------------------===//
+// MachineRegionInfo implementation
+//
+
+MachineRegionInfo::MachineRegionInfo() :
+ RegionInfoBase<RegionTraits<MachineFunction>>() {
+
+}
+
+MachineRegionInfo::~MachineRegionInfo() {
+
+}
+
+void MachineRegionInfo::updateStatistics(MachineRegion *R) {
+ ++numMachineRegions;
+
+ // TODO: Slow. Should only be enabled if -stats is used.
+ if (R->isSimple())
+ ++numMachineSimpleRegions;
+}
+
+void MachineRegionInfo::recalculate(MachineFunction &F,
+ MachineDominatorTree *DT_,
+ MachinePostDominatorTree *PDT_,
+ MachineDominanceFrontier *DF_) {
+ DT = DT_;
+ PDT = PDT_;
+ DF = DF_;
+
+ MachineBasicBlock *Entry = GraphTraits<MachineFunction*>::getEntryNode(&F);
+
+ TopLevelRegion = new MachineRegion(Entry, nullptr, this, DT, nullptr);
+ updateStatistics(TopLevelRegion);
+ calculate(F);
+}
+
+//===----------------------------------------------------------------------===//
+// MachineRegionInfoPass implementation
+//
+
+MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) {
+ initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry());
+}
+
+MachineRegionInfoPass::~MachineRegionInfoPass() {
+
+}
+
+bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
+ releaseMemory();
+
+ auto DT = &getAnalysis<MachineDominatorTree>();
+ auto PDT = &getAnalysis<MachinePostDominatorTree>();
+ auto DF = &getAnalysis<MachineDominanceFrontier>();
+
+ RI.recalculate(F, DT, PDT, DF);
+ return false;
+}
+
+void MachineRegionInfoPass::releaseMemory() {
+ RI.releaseMemory();
+}
+
+void MachineRegionInfoPass::verifyAnalysis() const {
+ // Only do verification when user wants to, otherwise this expensive check
+ // will be invoked by PMDataManager::verifyPreservedAnalysis when
+ // a regionpass (marked PreservedAll) finish.
+ if (MachineRegionInfo::VerifyRegionInfo)
+ RI.verifyAnalysis();
+}
+
+void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ AU.addRequired<DominanceFrontierWrapperPass>();
+}
+
+void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const {
+ RI.print(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineRegionInfoPass::dump() const {
+ RI.dump();
+}
+#endif
+
+char MachineRegionInfoPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions",
+ "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(MachineRegionInfoPass, "regions",
+ "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+ FunctionPass *createMachineRegionInfoPass() {
+ return new MachineRegionInfoPass();
+ }
+}
+
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 000000000000..613598dbe215
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,529 @@
+//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+// Pin the vtable to this file.
+void MachineRegisterInfo::Delegate::anchor() {}
+
+MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
+ : MF(MF), TheDelegate(nullptr), TracksSubRegLiveness(false) {
+ unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
+ VRegInfo.reserve(256);
+ RegAllocHints.reserve(256);
+ UsedPhysRegMask.resize(NumRegs);
+ PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]());
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+ assert(RC && RC->isAllocatable() && "Invalid RC for virtual register");
+ VRegInfo[Reg].first = RC;
+}
+
+void MachineRegisterInfo::setRegBank(unsigned Reg,
+ const RegisterBank &RegBank) {
+ VRegInfo[Reg].first = &RegBank;
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+ const TargetRegisterClass *RC,
+ unsigned MinNumRegs) {
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ if (OldRC == RC)
+ return RC;
+ const TargetRegisterClass *NewRC =
+ getTargetRegisterInfo()->getCommonSubClass(OldRC, RC);
+ if (!NewRC || NewRC == OldRC)
+ return NewRC;
+ if (NewRC->getNumRegs() < MinNumRegs)
+ return nullptr;
+ setRegClass(Reg, NewRC);
+ return NewRC;
+}
+
+bool
+MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ const TargetRegisterClass *NewRC =
+ getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF);
+
+ // Stop early if there is no room to grow.
+ if (NewRC == OldRC)
+ return false;
+
+ // Accumulate constraints from all uses.
+ for (MachineOperand &MO : reg_nodbg_operands(Reg)) {
+ // Apply the effect of the given operand to NewRC.
+ MachineInstr *MI = MO.getParent();
+ unsigned OpNo = &MO - &MI->getOperand(0);
+ NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, TII,
+ getTargetRegisterInfo());
+ if (!NewRC || NewRC == OldRC)
+ return false;
+ }
+ setRegClass(Reg, NewRC);
+ return true;
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+unsigned
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+ assert(RegClass && "Cannot create register without RegClass!");
+ assert(RegClass->isAllocatable() &&
+ "Virtual register RegClass must be allocatable.");
+
+ // New virtual register number.
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+ VRegInfo.grow(Reg);
+ VRegInfo[Reg].first = RegClass;
+ RegAllocHints.grow(Reg);
+ if (TheDelegate)
+ TheDelegate->MRI_NoteNewVirtualRegister(Reg);
+ return Reg;
+}
+
+unsigned
+MachineRegisterInfo::getSize(unsigned VReg) const {
+ VRegToSizeMap::const_iterator SizeIt = getVRegToSize().find(VReg);
+ return SizeIt != getVRegToSize().end() ? SizeIt->second : 0;
+}
+
+void MachineRegisterInfo::setSize(unsigned VReg, unsigned Size) {
+ getVRegToSize()[VReg] = Size;
+}
+
+unsigned
+MachineRegisterInfo::createGenericVirtualRegister(unsigned Size) {
+ assert(Size && "Cannot create empty virtual register");
+
+ // New virtual register number.
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+ VRegInfo.grow(Reg);
+ // FIXME: Should we use a dummy register class?
+ VRegInfo[Reg].first = static_cast<TargetRegisterClass *>(nullptr);
+ getVRegToSize()[Reg] = Size;
+ RegAllocHints.grow(Reg);
+ if (TheDelegate)
+ TheDelegate->MRI_NoteNewVirtualRegister(Reg);
+ return Reg;
+}
+
+/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
+void MachineRegisterInfo::clearVirtRegs() {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (!VRegInfo[Reg].second)
+ continue;
+ verifyUseList(Reg);
+ llvm_unreachable("Remaining virtual register operands");
+ }
+#endif
+ VRegInfo.clear();
+ for (auto &I : LiveIns)
+ I.second = 0;
+}
+
+void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
+#ifndef NDEBUG
+ bool Valid = true;
+ for (MachineOperand &M : reg_operands(Reg)) {
+ MachineOperand *MO = &M;
+ MachineInstr *MI = MO->getParent();
+ if (!MI) {
+ errs() << PrintReg(Reg, getTargetRegisterInfo())
+ << " use list MachineOperand " << MO
+ << " has no parent instruction.\n";
+ Valid = false;
+ continue;
+ }
+ MachineOperand *MO0 = &MI->getOperand(0);
+ unsigned NumOps = MI->getNumOperands();
+ if (!(MO >= MO0 && MO < MO0+NumOps)) {
+ errs() << PrintReg(Reg, getTargetRegisterInfo())
+ << " use list MachineOperand " << MO
+ << " doesn't belong to parent MI: " << *MI;
+ Valid = false;
+ }
+ if (!MO->isReg()) {
+ errs() << PrintReg(Reg, getTargetRegisterInfo())
+ << " MachineOperand " << MO << ": " << *MO
+ << " is not a register\n";
+ Valid = false;
+ }
+ if (MO->getReg() != Reg) {
+ errs() << PrintReg(Reg, getTargetRegisterInfo())
+ << " use-list MachineOperand " << MO << ": "
+ << *MO << " is the wrong register\n";
+ Valid = false;
+ }
+ }
+ assert(Valid && "Invalid use list");
+#endif
+}
+
+void MachineRegisterInfo::verifyUseLists() const {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ verifyUseList(TargetRegisterInfo::index2VirtReg(i));
+ for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i)
+ verifyUseList(i);
+#endif
+}
+
+/// Add MO to the linked list of operands for its register.
+void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
+ assert(!MO->isOnRegUseList() && "Already on list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+
+ // Head points to the first list element.
+ // Next is NULL on the last list element.
+ // Prev pointers are circular, so Head->Prev == Last.
+
+ // Head is NULL for an empty list.
+ if (!Head) {
+ MO->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Next = nullptr;
+ HeadRef = MO;
+ return;
+ }
+ assert(MO->getReg() == Head->getReg() && "Different regs on the same list!");
+
+ // Insert MO between Last and Head in the circular Prev chain.
+ MachineOperand *Last = Head->Contents.Reg.Prev;
+ assert(Last && "Inconsistent use list");
+ assert(MO->getReg() == Last->getReg() && "Different regs on the same list!");
+ Head->Contents.Reg.Prev = MO;
+ MO->Contents.Reg.Prev = Last;
+
+ // Def operands always precede uses. This allows def_iterator to stop early.
+ // Insert def operands at the front, and use operands at the back.
+ if (MO->isDef()) {
+ // Insert def at the front.
+ MO->Contents.Reg.Next = Head;
+ HeadRef = MO;
+ } else {
+ // Insert use at the end.
+ MO->Contents.Reg.Next = nullptr;
+ Last->Contents.Reg.Next = MO;
+ }
+}
+
+/// Remove MO from its use-def list.
+void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
+ assert(MO->isOnRegUseList() && "Operand not on use list");
+ MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+ MachineOperand *const Head = HeadRef;
+ assert(Head && "List already empty");
+
+ // Unlink this from the doubly linked list of operands.
+ MachineOperand *Next = MO->Contents.Reg.Next;
+ MachineOperand *Prev = MO->Contents.Reg.Prev;
+
+ // Prev links are circular, next link is NULL instead of looping back to Head.
+ if (MO == Head)
+ HeadRef = Next;
+ else
+ Prev->Contents.Reg.Next = Next;
+
+ (Next ? Next : Head)->Contents.Reg.Prev = Prev;
+
+ MO->Contents.Reg.Prev = nullptr;
+ MO->Contents.Reg.Next = nullptr;
+}
+
+/// Move NumOps operands from Src to Dst, updating use-def lists as needed.
+///
+/// The Dst range is assumed to be uninitialized memory. (Or it may contain
+/// operands that won't be destroyed, which is OK because the MO destructor is
+/// trivial anyway).
+///
+/// The Src and Dst ranges may overlap.
+void MachineRegisterInfo::moveOperands(MachineOperand *Dst,
+ MachineOperand *Src,
+ unsigned NumOps) {
+ assert(Src != Dst && NumOps && "Noop moveOperands");
+
+ // Copy backwards if Dst is within the Src range.
+ int Stride = 1;
+ if (Dst >= Src && Dst < Src + NumOps) {
+ Stride = -1;
+ Dst += NumOps - 1;
+ Src += NumOps - 1;
+ }
+
+ // Copy one operand at a time.
+ do {
+ new (Dst) MachineOperand(*Src);
+
+ // Dst takes Src's place in the use-def chain.
+ if (Src->isReg()) {
+ MachineOperand *&Head = getRegUseDefListHead(Src->getReg());
+ MachineOperand *Prev = Src->Contents.Reg.Prev;
+ MachineOperand *Next = Src->Contents.Reg.Next;
+ assert(Head && "List empty, but operand is chained");
+ assert(Prev && "Operand was not on use-def list");
+
+ // Prev links are circular, next link is NULL instead of looping back to
+ // Head.
+ if (Src == Head)
+ Head = Dst;
+ else
+ Prev->Contents.Reg.Next = Dst;
+
+ // Update Prev pointer. This also works when Src was pointing to itself
+ // in a 1-element list. In that case Head == Dst.
+ (Next ? Next : Head)->Contents.Reg.Prev = Dst;
+ }
+
+ Dst += Stride;
+ Src += Stride;
+ } while (--NumOps);
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function. This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+/// If ToReg is a physical register we apply the sub register to obtain the
+/// final/proper physical register.
+void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+ assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+
+ // TODO: This could be more efficient by bulk changing the operands.
+ for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
+ MachineOperand &O = *I;
+ ++I;
+ if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ O.substPhysReg(ToReg, *TRI);
+ } else {
+ O.setReg(ToReg);
+ }
+ }
+}
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found. This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+ // Since we are in SSA form, we can use the first definition.
+ def_instr_iterator I = def_instr_begin(Reg);
+ assert((I.atEnd() || std::next(I) == def_instr_end()) &&
+ "getVRegDef assumes a single definition or no definition");
+ return !I.atEnd() ? &*I : nullptr;
+}
+
+/// getUniqueVRegDef - Return the unique machine instr that defines the
+/// specified virtual register or null if none is found. If there are
+/// multiple definitions or no definition, return null.
+MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
+ if (def_empty(Reg)) return nullptr;
+ def_instr_iterator I = def_instr_begin(Reg);
+ if (std::next(I) != def_instr_end())
+ return nullptr;
+ return &*I;
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+ use_nodbg_iterator UI = use_nodbg_begin(RegNo);
+ if (UI == use_nodbg_end())
+ return false;
+ return ++UI == use_nodbg_end();
+}
+
+/// clearKillFlags - Iterate over all the uses of the given register and
+/// clear the kill flag from the MachineOperand. This function is used by
+/// optimization passes which extend register lifetimes and need only
+/// preserve conservative kill flag information.
+void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
+ for (MachineOperand &MO : use_operands(Reg))
+ MO.setIsKill(false);
+}
+
+bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->first == Reg || I->second == Reg)
+ return true;
+ return false;
+}
+
+/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->second == VReg)
+ return I->first;
+ return 0;
+}
+
+/// getLiveInVirtReg - If PReg is a live-in physical register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->first == PReg)
+ return I->second;
+ return 0;
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ // Emit the copies into the top of the block.
+ for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
+ if (LiveIns[i].second) {
+ if (use_empty(LiveIns[i].second)) {
+ // The livein has no uses. Drop it.
+ //
+ // It would be preferable to have isel avoid creating live-in
+ // records for unused arguments in the first place, but it's
+ // complicated by the debug info code for arguments.
+ LiveIns.erase(LiveIns.begin() + i);
+ --i; --e;
+ } else {
+ // Emit a copy.
+ BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::COPY), LiveIns[i].second)
+ .addReg(LiveIns[i].first);
+
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+ } else {
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
+}
+
+LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
+ // Lane masks are only defined for vregs.
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const TargetRegisterClass &TRC = *getRegClass(Reg);
+ return TRC.getLaneMask();
+}
+
+#ifndef NDEBUG
+void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+ for (MachineInstr &I : use_instructions(Reg))
+ I.dump();
+}
+#endif
+
+void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
+ ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF);
+ assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() &&
+ "Invalid ReservedRegs vector from target");
+}
+
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
+ const MachineFunction &MF) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+
+ // Check if any overlapping register is modified, or allocatable so it may be
+ // used later.
+ for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true);
+ AI.isValid(); ++AI)
+ if (!def_empty(*AI) || isAllocatable(*AI))
+ return false;
+ return true;
+}
+
+/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
+/// specified register as undefined which causes the DBG_VALUE to be
+/// deleted during LiveDebugVariables analysis.
+void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const {
+ // Mark any DBG_VALUE that uses Reg as undef (but don't delete it.)
+ MachineRegisterInfo::use_instr_iterator nextI;
+ for (use_instr_iterator I = use_instr_begin(Reg), E = use_instr_end();
+ I != E; I = nextI) {
+ nextI = std::next(I); // I is invalidated by the setReg
+ MachineInstr *UseMI = &*I;
+ if (UseMI->isDebugValue())
+ UseMI->getOperand(0).setReg(0U);
+ }
+}
+
+static const Function *getCalledFunction(const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isGlobal())
+ continue;
+ const Function *Func = dyn_cast<Function>(MO.getGlobal());
+ if (Func != nullptr)
+ return Func;
+ }
+ return nullptr;
+}
+
+static bool isNoReturnDef(const MachineOperand &MO) {
+ // Anything which is not a noreturn function is a real def.
+ const MachineInstr &MI = *MO.getParent();
+ if (!MI.isCall())
+ return false;
+ const MachineBasicBlock &MBB = *MI.getParent();
+ if (!MBB.succ_empty())
+ return false;
+ const MachineFunction &MF = *MBB.getParent();
+ // We need to keep correct unwind information even if the function will
+ // not return, since the runtime may need it.
+ if (MF.getFunction()->hasFnAttribute(Attribute::UWTable))
+ return false;
+ const Function *Called = getCalledFunction(MI);
+ return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) ||
+ !Called->hasFnAttribute(Attribute::NoUnwind));
+}
+
+bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg,
+ bool SkipNoReturnDef) const {
+ if (UsedPhysRegMask.test(PhysReg))
+ return true;
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) {
+ for (const MachineOperand &MO : make_range(def_begin(*AI), def_end())) {
+ if (!SkipNoReturnDef && isNoReturnDef(MO))
+ continue;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const {
+ if (UsedPhysRegMask.test(PhysReg))
+ return true;
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid();
+ ++AliasReg) {
+ if (!reg_nodbg_empty(*AliasReg))
+ return true;
+ }
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 000000000000..47ad60c5dd56
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,355 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-ssaupdater"
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+ SmallVectorImpl<MachineInstr*> *NewPHI)
+ : AV(nullptr), InsertedPHIs(NewPHI) {
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+ delete static_cast<AvailableValsTy*>(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates. ProtoValue is the value used to name PHI nodes.
+void MachineSSAUpdater::Initialize(unsigned V) {
+ if (!AV)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+
+ VR = V;
+ VRC = MRI->getRegClass(VR);
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+ return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
+ SmallVectorImpl<std::pair<MachineBasicBlock*, unsigned> > &PredValues) {
+ if (BB->empty())
+ return 0;
+
+ MachineBasicBlock::iterator I = BB->begin();
+ if (!I->isPHI())
+ return 0;
+
+ AvailableValsTy AVals;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ AVals[PredValues[i].first] = PredValues[i].second;
+ while (I != BB->end() && I->isPHI()) {
+ bool Same = true;
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = I->getOperand(i).getReg();
+ MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+ if (AVals[SrcBB] != SrcReg) {
+ Same = false;
+ break;
+ }
+ }
+ if (Same)
+ return I->getOperand(0).getReg();
+ ++I;
+ }
+ return 0;
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstrBuilder InsertNewDef(unsigned Opcode,
+ MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+ const TargetRegisterClass *RC,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!HasValueForBlock(BB))
+ return GetValueAtEndOfBlockInternal(BB);
+
+ // If there are no predecessors, just return undef.
+ if (BB->pred_empty()) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ VRC, MRI, TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
+ unsigned SingularValue = 0;
+
+ bool isFirstPred = true;
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // If an identical PHI is already in BB, just reuse it.
+ unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+ if (DupPHI)
+ return DupPHI;
+
+ // Otherwise, we do need a PHI: insert one now.
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
+ MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+ Loc, VRC, MRI, TII);
+
+ // Fill in all the predecessors of the PHI.
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+ InsertedPHI->eraseFromParent();
+ return ConstVal;
+ }
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI->getOperand(0).getReg();
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+ MachineOperand *U) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ if (&MI->getOperand(i) == U)
+ return MI->getOperand(i+1).getMBB();
+ }
+
+ llvm_unreachable("MachineOperand::getParent() failure?");
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+ MachineInstr *UseMI = U.getParent();
+ unsigned NewVR = 0;
+ if (UseMI->isPHI()) {
+ MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+ NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+ } else {
+ NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+ }
+
+ U.setReg(NewVR);
+}
+
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<MachineSSAUpdater> {
+public:
+ typedef MachineBasicBlock BlkT;
+ typedef unsigned ValT;
+ typedef MachineInstr PhiT;
+
+ typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+ /// Iterator for PHI operands.
+ class PHI_iterator {
+ private:
+ MachineInstr *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHI_iterator(MachineInstr *P) // begin iterator
+ : PHI(P), idx(1) {}
+ PHI_iterator(MachineInstr *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumOperands()) {}
+
+ PHI_iterator &operator++() { idx += 2; return *this; }
+ bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
+ bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+ unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+ MachineBasicBlock *getIncomingBlock() {
+ return PHI->getOperand(idx+1).getMBB();
+ }
+ };
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
+
+ /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+ /// vector.
+ static void FindPredecessorBlocks(MachineBasicBlock *BB,
+ SmallVectorImpl<MachineBasicBlock*> *Preds){
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
+
+ /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned GetUndefVal(MachineBasicBlock *BB,
+ MachineSSAUpdater *Updater) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ /// CreateEmptyPHI - Create a PHI instruction that defines a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+ MachineSSAUpdater *Updater) {
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
+ MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return PHI->getOperand(0).getReg();
+ }
+
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
+ MachineBasicBlock *Pred) {
+ MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred);
+ }
+
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static MachineInstr *InstrIsPHI(MachineInstr *I) {
+ if (I && I->isPHI())
+ return I;
+ return nullptr;
+ }
+
+ /// ValueIsPHI - Check if the instruction that defines the specified register
+ /// is a PHI instruction.
+ static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ return InstrIsPHI(Updater->MRI->getVRegDef(Val));
+ }
+
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ MachineInstr *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumOperands() <= 1)
+ return PHI;
+ return nullptr;
+ }
+
+ /// GetPHIValue - For the specified PHI instruction, return the register
+ /// that it defines.
+ static unsigned GetPHIValue(MachineInstr *PHI) {
+ return PHI->getOperand(0).getReg();
+ }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (unsigned V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
new file mode 100644
index 000000000000..d921e2977cc7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -0,0 +1,3553 @@
+//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+namespace llvm {
+cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+ cl::desc("Force top-down list scheduling"));
+cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+ cl::desc("Force bottom-up list scheduling"));
+cl::opt<bool>
+DumpCriticalPathLength("misched-dcpl", cl::Hidden,
+ cl::desc("Print critical path length to stdout"));
+}
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+/// In some situations a few uninteresting nodes depend on nearly all other
+/// nodes in the graph, provide a cutoff to hide them.
+static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
+ cl::desc("Hide nodes with more predecessor/successor than cutoff"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+ cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+
+static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
+ cl::desc("Only schedule this function"));
+static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
+ cl::desc("Only schedule this MBB#"));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
+/// Avoid quadratic complexity in unusually large basic blocks by limiting the
+/// size of the ready lists.
+static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
+ cl::desc("Limit ready list to N instructions"), cl::init(256));
+
+static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
+ cl::desc("Enable register pressure scheduling."), cl::init(true));
+
+static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
+ cl::desc("Enable cyclic critical path analysis."), cl::init(true));
+
+static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
+ cl::desc("Enable memop clustering."),
+ cl::init(true));
+
+// Experimental heuristics
+static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
+ cl::desc("Verify machine instrs before and after machine scheduling"));
+
+// DAG subtrees must have at least this many nodes.
+static const unsigned MinSubtreeSize = 8;
+
+// Pin the vtables to this file.
+void MachineSchedStrategy::anchor() {}
+void ScheduleDAGMutation::anchor() {}
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Scheduling Pass and Registry
+//===----------------------------------------------------------------------===//
+
+MachineSchedContext::MachineSchedContext():
+ MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) {
+ RegClassInfo = new RegisterClassInfo();
+}
+
+MachineSchedContext::~MachineSchedContext() {
+ delete RegClassInfo;
+}
+
+namespace {
+/// Base class for a machine scheduler class that can run at any point.
+class MachineSchedulerBase : public MachineSchedContext,
+ public MachineFunctionPass {
+public:
+ MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
+
+ void print(raw_ostream &O, const Module* = nullptr) const override;
+
+protected:
+ void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
+};
+
+/// MachineScheduler runs after coalescing and before register allocation.
+class MachineScheduler : public MachineSchedulerBase {
+public:
+ MachineScheduler();
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction&) override;
+
+ static char ID; // Class identification, replacement for typeinfo
+
+protected:
+ ScheduleDAGInstrs *createMachineScheduler();
+};
+
+/// PostMachineScheduler runs after shortly before code emission.
+class PostMachineScheduler : public MachineSchedulerBase {
+public:
+ PostMachineScheduler();
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction&) override;
+
+ static char ID; // Class identification, replacement for typeinfo
+
+protected:
+ ScheduleDAGInstrs *createPostMachineScheduler();
+};
+} // namespace
+
+char MachineScheduler::ID = 0;
+
+char &llvm::MachineSchedulerID = MachineScheduler::ID;
+
+INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
+ "Machine Instruction Scheduler", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
+ "Machine Instruction Scheduler", false, false)
+
+MachineScheduler::MachineScheduler()
+: MachineSchedulerBase(ID) {
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+char PostMachineScheduler::ID = 0;
+
+char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
+
+INITIALIZE_PASS(PostMachineScheduler, "postmisched",
+ "PostRA Machine Instruction Scheduler", false, false)
+
+PostMachineScheduler::PostMachineScheduler()
+: MachineSchedulerBase(ID) {
+ initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachinePassRegistry MachineSchedRegistry::Registry;
+
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+ return nullptr;
+}
+
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+ RegisterPassParser<MachineSchedRegistry> >
+MachineSchedOpt("misched",
+ cl::init(&useDefaultMachineSched), cl::Hidden,
+ cl::desc("Machine instruction scheduler to use"));
+
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+ useDefaultMachineSched);
+
+static cl::opt<bool> EnableMachineSched(
+ "enable-misched",
+ cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
+ cl::Hidden);
+
+static cl::opt<bool> EnablePostRAMachineSched(
+ "enable-post-misched",
+ cl::desc("Enable the post-ra machine instruction scheduling pass."),
+ cl::init(true), cl::Hidden);
+
+/// Forward declare the standard machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
+static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
+
+/// Decrement this iterator until reaching the top or a non-debug instr.
+static MachineBasicBlock::const_iterator
+priorNonDebug(MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator Beg) {
+ assert(I != Beg && "reached the top of the region, cannot decrement");
+ while (--I != Beg) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// Non-const version.
+static MachineBasicBlock::iterator
+priorNonDebug(MachineBasicBlock::iterator I,
+ MachineBasicBlock::const_iterator Beg) {
+ return const_cast<MachineInstr*>(
+ &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
+}
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::const_iterator
+nextIfDebug(MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator End) {
+ for(; I != End; ++I) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// Non-const version.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I,
+ MachineBasicBlock::const_iterator End) {
+ // Cast the return value to nonconst MachineInstr, then cast to an
+ // instr_iterator, which does not check for null, finally return a
+ // bundle_iterator.
+ return MachineBasicBlock::instr_iterator(
+ const_cast<MachineInstr*>(
+ &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
+}
+
+/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
+ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
+ // Select the scheduler, or set the default.
+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+ if (Ctor != useDefaultMachineSched)
+ return Ctor(this);
+
+ // Get the default scheduler set by the target for this function.
+ ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
+ if (Scheduler)
+ return Scheduler;
+
+ // Default to GenericScheduler.
+ return createGenericSchedLive(this);
+}
+
+/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
+/// the caller. We don't have a command line option to override the postRA
+/// scheduler. The Target must configure it.
+ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
+ // Get the postRA scheduler set by the target for this function.
+ ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
+ if (Scheduler)
+ return Scheduler;
+
+ // Default to GenericScheduler.
+ return createGenericSchedPostRA(this);
+}
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boudary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(*mf.getFunction()))
+ return false;
+
+ if (EnableMachineSched.getNumOccurrences()) {
+ if (!EnableMachineSched)
+ return false;
+ } else if (!mf.getSubtarget().enableMachineScheduler())
+ return false;
+
+ DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
+
+ // Initialize the context of the pass.
+ MF = &mf;
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ LIS = &getAnalysis<LiveIntervals>();
+
+ if (VerifyScheduling) {
+ DEBUG(LIS->dump());
+ MF->verify(this, "Before machine scheduling.");
+ }
+ RegClassInfo->runOnMachineFunction(*MF);
+
+ // Instantiate the selected scheduler for this target, function, and
+ // optimization level.
+ std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
+ scheduleRegions(*Scheduler, false);
+
+ DEBUG(LIS->dump());
+ if (VerifyScheduling)
+ MF->verify(this, "After machine scheduling.");
+ return true;
+}
+
+bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ if (skipFunction(*mf.getFunction()))
+ return false;
+
+ if (EnablePostRAMachineSched.getNumOccurrences()) {
+ if (!EnablePostRAMachineSched)
+ return false;
+ } else if (!mf.getSubtarget().enablePostRAScheduler()) {
+ DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
+ return false;
+ }
+ DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
+
+ // Initialize the context of the pass.
+ MF = &mf;
+ PassConfig = &getAnalysis<TargetPassConfig>();
+
+ if (VerifyScheduling)
+ MF->verify(this, "Before post machine scheduling.");
+
+ // Instantiate the selected scheduler for this target, function, and
+ // optimization level.
+ std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
+ scheduleRegions(*Scheduler, true);
+
+ if (VerifyScheduling)
+ MF->verify(this, "After post machine scheduling.");
+ return true;
+}
+
+/// Return true of the given instruction should not be included in a scheduling
+/// region.
+///
+/// MachineScheduler does not currently support scheduling across calls. To
+/// handle calls, the DAG builder needs to be modified to create register
+/// anti/output dependencies on the registers clobbered by the call's regmask
+/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
+/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
+/// the boundary, but there would be no benefit to postRA scheduling across
+/// calls this late anyway.
+static bool isSchedBoundary(MachineBasicBlock::iterator MI,
+ MachineBasicBlock *MBB,
+ MachineFunction *MF,
+ const TargetInstrInfo *TII) {
+ return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
+}
+
+/// Main driver for both MachineScheduler and PostMachineScheduler.
+void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
+ bool FixKillFlags) {
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ // Visit all machine basic blocks.
+ //
+ // TODO: Visit blocks in global postorder or postorder within the bottom-up
+ // loop tree. Then we can optionally compute global RegPressure.
+ for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+ MBB != MBBEnd; ++MBB) {
+
+ Scheduler.startBlock(&*MBB);
+
+#ifndef NDEBUG
+ if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
+ continue;
+ if (SchedOnlyBlock.getNumOccurrences()
+ && (int)SchedOnlyBlock != MBB->getNumber())
+ continue;
+#endif
+
+ // Break the block into scheduling regions [I, RegionEnd), and schedule each
+ // region as soon as it is discovered. RegionEnd points the scheduling
+ // boundary at the bottom of the region. The DAG does not include RegionEnd,
+ // but the region does (i.e. the next RegionEnd is above the previous
+ // RegionBegin). If the current block has no terminator then RegionEnd ==
+ // MBB->end() for the bottom region.
+ //
+ // The Scheduler may insert instructions during either schedule() or
+ // exitRegion(), even for empty regions. So the local iterators 'I' and
+ // 'RegionEnd' are invalid across these calls.
+ //
+ // MBB::size() uses instr_iterator to count. Here we need a bundle to count
+ // as a single instruction.
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
+
+ // Avoid decrementing RegionEnd for blocks with no terminator.
+ if (RegionEnd != MBB->end() ||
+ isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
+ --RegionEnd;
+ }
+
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ unsigned NumRegionInstrs = 0;
+ MachineBasicBlock::iterator I = RegionEnd;
+ for (;I != MBB->begin(); --I) {
+ if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
+ break;
+ if (!I->isDebugValue())
+ ++NumRegionInstrs;
+ }
+ // Notify the scheduler of the region, even if we may skip scheduling
+ // it. Perhaps it still needs to be bundled.
+ Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (I == RegionEnd || I == std::prev(RegionEnd)) {
+ // Close the current region. Bundle the terminator if needed.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler.exitRegion();
+ continue;
+ }
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ DEBUG(dbgs() << MF->getName()
+ << ":BB#" << MBB->getNumber() << " " << MBB->getName()
+ << "\n From: " << *I << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << MF->getName();
+ errs() << ":BB# " << MBB->getNumber();
+ errs() << " " << MBB->getName() << " \n";
+ }
+
+ // Schedule a region: possibly reorder instructions.
+ // This invalidates 'RegionEnd' and 'I'.
+ Scheduler.schedule();
+
+ // Close the current region.
+ Scheduler.exitRegion();
+
+ // Scheduling has invalidated the current iterator 'I'. Ask the
+ // scheduler for the top of it's scheduled region.
+ RegionEnd = Scheduler.begin();
+ }
+ Scheduler.finishBlock();
+ // FIXME: Ideally, no further passes should rely on kill flags. However,
+ // thumb2 size reduction is currently an exception, so the PostMIScheduler
+ // needs to do this.
+ if (FixKillFlags)
+ Scheduler.fixupKills(&*MBB);
+ }
+ Scheduler.finalizeSchedule();
+}
+
+void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
+ // unimplemented
+}
+
+LLVM_DUMP_METHOD
+void ReadyQueue::dump() {
+ dbgs() << "Queue " << Name << ": ";
+ for (unsigned i = 0, e = Queue.size(); i < e; ++i)
+ dbgs() << Queue[i]->NodeNum << " ";
+ dbgs() << "\n";
+}
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMI - Basic machine instruction scheduling. This is
+// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
+// virtual registers.
+// ===----------------------------------------------------------------------===/
+
+// Provide a vtable anchor.
+ScheduleDAGMI::~ScheduleDAGMI() {
+}
+
+bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
+ return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
+}
+
+bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+ if (SuccSU != &ExitSU) {
+ // Do not use WillCreateCycle, it assumes SD scheduling.
+ // If Pred is reachable from Succ, then the edge creates a cycle.
+ if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+ return false;
+ Topo.AddPred(SuccSU, PredDep.getSUnit());
+ }
+ SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+ // Return true regardless of whether a new edge needed to be inserted.
+ return true;
+}
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+///
+/// FIXME: Adjust SuccSU height based on MinLatency.
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ if (SuccEdge->isCluster())
+ NextClusterSucc = SuccSU;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
+ // CurrCycle may have advanced since then.
+ if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
+ SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
+
+ --SuccSU->NumPredsLeft;
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ releaseSucc(SU, &*I);
+ }
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+///
+/// FIXME: Adjust PredSU height based on MinLatency.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+ if (PredEdge->isWeak()) {
+ --PredSU->WeakSuccsLeft;
+ if (PredEdge->isCluster())
+ NextClusterPred = PredSU;
+ return;
+ }
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
+ // CurrCycle may have advanced since then.
+ if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
+ PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
+
+ --PredSU->NumSuccsLeft;
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+ SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ releasePred(SU, &*I);
+ }
+}
+
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs)
+{
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
+
+ SchedImpl->initPolicy(begin, end, regioninstrs);
+}
+
+/// This is normally called from the main scheduler loop but may also be invoked
+/// by the scheduling strategy to perform additional code motion.
+void ScheduleDAGMI::moveInstruction(
+ MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
+ // Advance RegionBegin if the first instruction moves down.
+ if (&*RegionBegin == MI)
+ ++RegionBegin;
+
+ // Update the instruction stream.
+ BB->splice(InsertPos, BB, MI);
+
+ // Update LiveIntervals
+ if (LIS)
+ LIS->handleMove(*MI, /*UpdateFlags=*/true);
+
+ // Recede RegionBegin if an instruction moves above the first.
+ if (RegionBegin == InsertPos)
+ RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#ifndef NDEBUG
+ if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+ CurrentTop = CurrentBottom;
+ return false;
+ }
+ ++NumInstrsScheduled;
+#endif
+ return true;
+}
+
+/// Per-region scheduling driver, called back from
+/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
+/// does not consider liveness or register pressure. It is useful for PostRA
+/// scheduling and potentially other custom schedulers.
+void ScheduleDAGMI::schedule() {
+ DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
+ DEBUG(SchedImpl->dumpPolicy());
+
+ // Build the DAG.
+ buildSchedGraph(AA);
+
+ Topo.InitDAGTopologicalSorting();
+
+ postprocessDAG();
+
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ // This may initialize a DFSResult to be used for queue priority.
+ SchedImpl->initialize(this);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+ if (ViewMISchedDAGs) viewGraph();
+
+ // Initialize ready queues now that the DAG and priority data are finalized.
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (true) {
+ DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
+ assert(!SU->isScheduled && "Node already scheduled");
+ if (!checkSchedLimit())
+ break;
+
+ MachineInstr *MI = SU->getInstr();
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else
+ moveInstruction(MI, CurrentTop);
+ } else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
+ else {
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ }
+ }
+ // Notify the scheduling strategy before updating the DAG.
+ // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
+ // runs, it can then use the accurate ReadyCycle time to determine whether
+ // newly released nodes can move to the readyQ.
+ SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ DEBUG({
+ unsigned BBNum = begin()->getParent()->getNumber();
+ dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void ScheduleDAGMI::postprocessDAG() {
+ for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
+ Mutations[i]->apply(this);
+ }
+}
+
+void ScheduleDAGMI::
+findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+ SmallVectorImpl<SUnit*> &BotRoots) {
+ for (std::vector<SUnit>::iterator
+ I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+ SUnit *SU = &(*I);
+ assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
+
+ // Order predecessors so DFSResult follows the critical path.
+ SU->biasCriticalPath();
+
+ // A SUnit is ready to top schedule if it has no predecessors.
+ if (!I->NumPredsLeft)
+ TopRoots.push_back(SU);
+ // A SUnit is ready to bottom schedule if it has no successors.
+ if (!I->NumSuccsLeft)
+ BotRoots.push_back(SU);
+ }
+ ExitSU.biasCriticalPath();
+}
+
+/// Identify DAG roots and setup scheduler queues.
+void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
+ NextClusterSucc = nullptr;
+ NextClusterPred = nullptr;
+
+ // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
+ //
+ // Nodes with unreleased weak edges can still be roots.
+ // Release top roots in forward order.
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
+ SchedImpl->releaseTopNode(*I);
+ }
+ // Release bottom roots in reverse order so the higher priority nodes appear
+ // first. This is more natural and slightly more efficient.
+ for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+ I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
+ SchedImpl->releaseBottomNode(*I);
+ }
+
+ releaseSuccessors(&EntrySU);
+ releasePredecessors(&ExitSU);
+
+ SchedImpl->registerRoots();
+
+ // Advance past initial DebugValues.
+ CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
+ CurrentBottom = RegionEnd;
+}
+
+/// Update scheduler queues after scheduling an instruction.
+void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
+ // Release dependent instructions for scheduling.
+ if (IsTopNode)
+ releaseSuccessors(SU);
+ else
+ releasePredecessors(SU);
+
+ SU->isScheduled = true;
+}
+
+/// Reinsert any remaining debug_values, just like the PostRA scheduler.
+void ScheduleDAGMI::placeDebugValues() {
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue) {
+ BB->splice(RegionBegin, BB, FirstDbgValue);
+ RegionBegin = FirstDbgValue;
+ }
+
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrevMI = P.second;
+ if (&*RegionBegin == DbgValue)
+ ++RegionBegin;
+ BB->splice(++OrigPrevMI, BB, DbgValue);
+ if (OrigPrevMI == std::prev(RegionEnd))
+ RegionEnd = DbgValue;
+ }
+ DbgValues.clear();
+ FirstDbgValue = nullptr;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGMI::dumpSchedule() const {
+ for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
+ if (SUnit *SU = getSUnit(&(*MI)))
+ SU->dump(this);
+ else
+ dbgs() << "Missing SUnit\n";
+ }
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
+//===----------------------------------------------------------------------===//
+
+ScheduleDAGMILive::~ScheduleDAGMILive() {
+ delete DFSResult;
+}
+
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs)
+{
+ // ScheduleDAGMI initializes SchedImpl's per-region policy.
+ ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
+
+ // For convenience remember the end of the liveness region.
+ LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
+
+ SUPressureDiffs.clear();
+
+ ShouldTrackPressure = SchedImpl->shouldTrackPressure();
+ ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
+
+ assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
+ "ShouldTrackLaneMasks requires ShouldTrackPressure");
+}
+
+// Setup the register pressure trackers for the top scheduled top and bottom
+// scheduled regions.
+void ScheduleDAGMILive::initRegPressure() {
+ TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
+ ShouldTrackLaneMasks, false);
+ BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
+ ShouldTrackLaneMasks, false);
+
+ // Close the RPTracker to finalize live ins.
+ RPTracker.closeRegion();
+
+ DEBUG(RPTracker.dump());
+
+ // Initialize the live ins and live outs.
+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+ // Close one end of the tracker so we can call
+ // getMaxUpward/DownwardPressureDelta before advancing across any
+ // instructions. This converts currently live regs into live ins/outs.
+ TopRPTracker.closeTop();
+ BotRPTracker.closeBottom();
+
+ BotRPTracker.initLiveThru(RPTracker);
+ if (!BotRPTracker.getLiveThru().empty()) {
+ TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
+ DEBUG(dbgs() << "Live Thru: ";
+ dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
+ };
+
+ // For each live out vreg reduce the pressure change associated with other
+ // uses of the same vreg below the live-out reaching def.
+ updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
+
+ // Account for liveness generated by the region boundary.
+ if (LiveRegionEnd != RegionEnd) {
+ SmallVector<RegisterMaskPair, 8> LiveUses;
+ BotRPTracker.recede(&LiveUses);
+ updatePressureDiffs(LiveUses);
+ }
+
+ DEBUG(
+ dbgs() << "Top Pressure:\n";
+ dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+ dbgs() << "Bottom Pressure:\n";
+ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
+ );
+
+ assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
+
+ // Cache the list of excess pressure sets in this region. This will also track
+ // the max pressure in the scheduled code for these sets.
+ RegionCriticalPSets.clear();
+ const std::vector<unsigned> &RegionPressure =
+ RPTracker.getPressure().MaxSetPressure;
+ for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
+ unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
+ if (RegionPressure[i] > Limit) {
+ DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+ << " Limit " << Limit
+ << " Actual " << RegionPressure[i] << "\n");
+ RegionCriticalPSets.push_back(PressureChange(i));
+ }
+ }
+ DEBUG(dbgs() << "Excess PSets: ";
+ for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
+ dbgs() << TRI->getRegPressureSetName(
+ RegionCriticalPSets[i].getPSet()) << " ";
+ dbgs() << "\n");
+}
+
+void ScheduleDAGMILive::
+updateScheduledPressure(const SUnit *SU,
+ const std::vector<unsigned> &NewMaxPressure) {
+ const PressureDiff &PDiff = getPressureDiff(SU);
+ unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
+ for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
+ I != E; ++I) {
+ if (!I->isValid())
+ break;
+ unsigned ID = I->getPSet();
+ while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
+ ++CritIdx;
+ if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
+ if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
+ && NewMaxPressure[ID] <= INT16_MAX)
+ RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
+ }
+ unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
+ if (NewMaxPressure[ID] >= Limit - 2) {
+ DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
+ << NewMaxPressure[ID]
+ << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
+ << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
+ }
+ }
+}
+
+/// Update the PressureDiff array for liveness after scheduling this
+/// instruction.
+void ScheduleDAGMILive::updatePressureDiffs(
+ ArrayRef<RegisterMaskPair> LiveUses) {
+ for (const RegisterMaskPair &P : LiveUses) {
+ unsigned Reg = P.RegUnit;
+ /// FIXME: Currently assuming single-use physregs.
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+
+ if (ShouldTrackLaneMasks) {
+ // If the register has just become live then other uses won't change
+ // this fact anymore => decrement pressure.
+ // If the register has just become dead then other uses make it come
+ // back to life => increment pressure.
+ bool Decrement = P.LaneMask != 0;
+
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit &SU = *V2SU.SU;
+ if (SU.isScheduled || &SU == &ExitSU)
+ continue;
+
+ PressureDiff &PDiff = getPressureDiff(&SU);
+ PDiff.addPressureChange(Reg, Decrement, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
+ << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask)
+ << ' ' << *SU.getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
+ } else {
+ assert(P.LaneMask != 0);
+ DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
+ // This may be called before CurrentBottom has been initialized. However,
+ // BotRPTracker must have a valid position. We want the value live into the
+ // instruction or live out of the block, so ask for the previous
+ // instruction's live-out.
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ VNInfo *VNI;
+ MachineBasicBlock::const_iterator I =
+ nextIfDebug(BotRPTracker.getPos(), BB->end());
+ if (I == BB->end())
+ VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
+ else {
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
+ VNI = LRQ.valueIn();
+ }
+ // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
+ assert(VNI && "No live value at use.");
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
+ // If this use comes before the reaching def, it cannot be a last use,
+ // so decrease its pressure change.
+ if (!SU->isScheduled && SU != &ExitSU) {
+ LiveQueryResult LRQ =
+ LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
+ if (LRQ.valueIn() == VNI) {
+ PressureDiff &PDiff = getPressureDiff(SU);
+ PDiff.addPressureChange(Reg, true, &MRI);
+ DEBUG(
+ dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
+ << *SU->getInstr();
+ dbgs() << " to ";
+ PDiff.dump(*TRI);
+ );
+ }
+ }
+ }
+ }
+ }
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
+/// only includes instructions that have DAG nodes, not scheduling boundaries.
+///
+/// This is a skeletal driver, with all the functionality pushed into helpers,
+/// so that it can be easily extended by experimental schedulers. Generally,
+/// implementing MachineSchedStrategy should be sufficient to implement a new
+/// scheduling algorithm. However, if a scheduler further subclasses
+/// ScheduleDAGMILive then it will want to override this virtual method in order
+/// to update any specialized state.
+void ScheduleDAGMILive::schedule() {
+ DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
+ DEBUG(SchedImpl->dumpPolicy());
+ buildDAGWithRegPressure();
+
+ Topo.InitDAGTopologicalSorting();
+
+ postprocessDAG();
+
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+
+ // Initialize the strategy before modifying the DAG.
+ // This may initialize a DFSResult to be used for queue priority.
+ SchedImpl->initialize(this);
+
+ DEBUG(
+ for (const SUnit &SU : SUnits) {
+ SU.dumpAll(this);
+ if (ShouldTrackPressure) {
+ dbgs() << " Pressure Diff : ";
+ getPressureDiff(&SU).dump(*TRI);
+ }
+ dbgs() << '\n';
+ }
+ );
+ if (ViewMISchedDAGs) viewGraph();
+
+ // Initialize ready queues now that the DAG and priority data are finalized.
+ initQueues(TopRoots, BotRoots);
+
+ bool IsTopNode = false;
+ while (true) {
+ DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
+ assert(!SU->isScheduled && "Node already scheduled");
+ if (!checkSchedLimit())
+ break;
+
+ scheduleMI(SU, IsTopNode);
+
+ if (DFSResult) {
+ unsigned SubtreeID = DFSResult->getSubtreeID(SU);
+ if (!ScheduledTrees.test(SubtreeID)) {
+ ScheduledTrees.set(SubtreeID);
+ DFSResult->scheduleTree(SubtreeID);
+ SchedImpl->scheduleTree(SubtreeID);
+ }
+ }
+
+ // Notify the scheduling strategy after updating the DAG.
+ SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
+ }
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ DEBUG({
+ unsigned BBNum = begin()->getParent()->getNumber();
+ dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+/// Build the DAG and setup three register pressure trackers.
+void ScheduleDAGMILive::buildDAGWithRegPressure() {
+ if (!ShouldTrackPressure) {
+ RPTracker.reset();
+ RegionCriticalPSets.clear();
+ buildSchedGraph(AA);
+ return;
+ }
+
+ // Initialize the register pressure tracker used by buildSchedGraph.
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
+ ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
+
+ // Account for liveness generate by the region boundary.
+ if (LiveRegionEnd != RegionEnd)
+ RPTracker.recede();
+
+ // Build the DAG, and compute current register pressure.
+ buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
+
+ // Initialize top/bottom trackers after computing region pressure.
+ initRegPressure();
+}
+
+void ScheduleDAGMILive::computeDFSResult() {
+ if (!DFSResult)
+ DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
+ DFSResult->clear();
+ ScheduledTrees.clear();
+ DFSResult->resize(SUnits.size());
+ DFSResult->compute(SUnits);
+ ScheduledTrees.resize(DFSResult->getNumSubtrees());
+}
+
+/// Compute the max cyclic critical path through the DAG. The scheduling DAG
+/// only provides the critical path for single block loops. To handle loops that
+/// span blocks, we could use the vreg path latencies provided by
+/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
+/// available for use in the scheduler.
+///
+/// The cyclic path estimation identifies a def-use pair that crosses the back
+/// edge and considers the depth and height of the nodes. For example, consider
+/// the following instruction sequence where each instruction has unit latency
+/// and defines an epomymous virtual register:
+///
+/// a->b(a,c)->c(b)->d(c)->exit
+///
+/// The cyclic critical path is a two cycles: b->c->b
+/// The acyclic critical path is four cycles: a->b->c->d->exit
+/// LiveOutHeight = height(c) = len(c->d->exit) = 2
+/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
+/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
+/// LiveInDepth = depth(b) = len(a->b) = 1
+///
+/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
+/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
+/// CyclicCriticalPath = min(2, 2) = 2
+///
+/// This could be relevant to PostRA scheduling, but is currently implemented
+/// assuming LiveIntervals.
+unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
+ // This only applies to single block loop.
+ if (!BB->isSuccessor(BB))
+ return 0;
+
+ unsigned MaxCyclicLatency = 0;
+ // Visit each live out vreg def to find def/use pairs that cross iterations.
+ for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
+ unsigned Reg = P.RegUnit;
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
+ if (!DefVNI)
+ continue;
+
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
+ const SUnit *DefSU = getSUnit(DefMI);
+ if (!DefSU)
+ continue;
+
+ unsigned LiveOutHeight = DefSU->getHeight();
+ unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
+ // Visit all local users of the vreg def.
+ for (const VReg2SUnit &V2SU
+ : make_range(VRegUses.find(Reg), VRegUses.end())) {
+ SUnit *SU = V2SU.SU;
+ if (SU == &ExitSU)
+ continue;
+
+ // Only consider uses of the phi.
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
+ if (!LRQ.valueIn()->isPHIDef())
+ continue;
+
+ // Assume that a path spanning two iterations is a cycle, which could
+ // overestimate in strange cases. This allows cyclic latency to be
+ // estimated as the minimum slack of the vreg's depth or height.
+ unsigned CyclicLatency = 0;
+ if (LiveOutDepth > SU->getDepth())
+ CyclicLatency = LiveOutDepth - SU->getDepth();
+
+ unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
+ if (LiveInHeight > LiveOutHeight) {
+ if (LiveInHeight - LiveOutHeight < CyclicLatency)
+ CyclicLatency = LiveInHeight - LiveOutHeight;
+ } else
+ CyclicLatency = 0;
+
+ DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
+ << SU->NodeNum << ") = " << CyclicLatency << "c\n");
+ if (CyclicLatency > MaxCyclicLatency)
+ MaxCyclicLatency = CyclicLatency;
+ }
+ }
+ DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
+ return MaxCyclicLatency;
+}
+
+/// Release ExitSU predecessors and setup scheduler queues. Re-position
+/// the Top RP tracker in case the region beginning has changed.
+void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
+ ArrayRef<SUnit*> BotRoots) {
+ ScheduleDAGMI::initQueues(TopRoots, BotRoots);
+ if (ShouldTrackPressure) {
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ TopRPTracker.setPos(CurrentTop);
+ }
+}
+
+/// Move an instruction and update register pressure.
+void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
+ // Move the instruction to its new location in the instruction stream.
+ MachineInstr *MI = SU->getInstr();
+
+ if (IsTopNode) {
+ assert(SU->isTopReady() && "node still has unscheduled dependencies");
+ if (&*CurrentTop == MI)
+ CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+ else {
+ moveInstruction(MI, CurrentTop);
+ TopRPTracker.setPos(MI);
+ }
+
+ if (ShouldTrackPressure) {
+ // Update top scheduled pressure.
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ TopRPTracker.advance(RegOpers);
+ assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+ DEBUG(
+ dbgs() << "Top Pressure:\n";
+ dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+ );
+
+ updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
+ }
+ } else {
+ assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+ MachineBasicBlock::iterator priorII =
+ priorNonDebug(CurrentBottom, CurrentTop);
+ if (&*priorII == MI)
+ CurrentBottom = priorII;
+ else {
+ if (&*CurrentTop == MI) {
+ CurrentTop = nextIfDebug(++CurrentTop, priorII);
+ TopRPTracker.setPos(CurrentTop);
+ }
+ moveInstruction(MI, CurrentBottom);
+ CurrentBottom = MI;
+ }
+ if (ShouldTrackPressure) {
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+
+ BotRPTracker.recedeSkipDebugValues();
+ SmallVector<RegisterMaskPair, 8> LiveUses;
+ BotRPTracker.recede(RegOpers, &LiveUses);
+ assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+ DEBUG(
+ dbgs() << "Bottom Pressure:\n";
+ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
+ );
+
+ updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
+ updatePressureDiffs(LiveUses);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between neighboring
+/// loads or between neighboring stores.
+class BaseMemOpClusterMutation : public ScheduleDAGMutation {
+ struct MemOpInfo {
+ SUnit *SU;
+ unsigned BaseReg;
+ int64_t Offset;
+ MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
+ : SU(su), BaseReg(reg), Offset(ofs) {}
+
+ bool operator<(const MemOpInfo&RHS) const {
+ return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
+ }
+ };
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ bool IsLoad;
+
+public:
+ BaseMemOpClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri, bool IsLoad)
+ : TII(tii), TRI(tri), IsLoad(IsLoad) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+
+protected:
+ void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
+};
+
+class StoreClusterMutation : public BaseMemOpClusterMutation {
+public:
+ StoreClusterMutation(const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, false) {}
+};
+
+class LoadClusterMutation : public BaseMemOpClusterMutation {
+public:
+ LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
+ : BaseMemOpClusterMutation(tii, tri, true) {}
+};
+} // anonymous
+
+void BaseMemOpClusterMutation::clusterNeighboringMemOps(
+ ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
+ SmallVector<MemOpInfo, 32> MemOpRecords;
+ for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) {
+ SUnit *SU = MemOps[Idx];
+ unsigned BaseReg;
+ int64_t Offset;
+ if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
+ MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
+ }
+ if (MemOpRecords.size() < 2)
+ return;
+
+ std::sort(MemOpRecords.begin(), MemOpRecords.end());
+ unsigned ClusterLength = 1;
+ for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
+ if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) {
+ ClusterLength = 1;
+ continue;
+ }
+
+ SUnit *SUa = MemOpRecords[Idx].SU;
+ SUnit *SUb = MemOpRecords[Idx+1].SU;
+ if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(),
+ ClusterLength) &&
+ DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
+ DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
+ << SUb->NodeNum << ")\n");
+ // Copy successor edges from SUa to SUb. Interleaving computation
+ // dependent on SUa can prevent load combining due to register reuse.
+ // Predecessor edges do not need to be copied from SUb to SUa since nearby
+ // loads should have effectively the same inputs.
+ for (SUnit::const_succ_iterator
+ SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
+ if (SI->getSUnit() == SUb)
+ continue;
+ DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
+ DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
+ }
+ ++ClusterLength;
+ } else
+ ClusterLength = 1;
+ }
+}
+
+/// \brief Callback from DAG postProcessing to create cluster edges for loads.
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
+
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
+ // Map DAG NodeNum to store chain ID.
+ DenseMap<unsigned, unsigned> StoreChainIDs;
+ // Map each store chain to a set of dependent MemOps.
+ SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
+ for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+ SUnit *SU = &DAG->SUnits[Idx];
+ if ((IsLoad && !SU->getInstr()->mayLoad()) ||
+ (!IsLoad && !SU->getInstr()->mayStore()))
+ continue;
+
+ unsigned ChainPredID = DAG->SUnits.size();
+ for (SUnit::const_pred_iterator
+ PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+ if (PI->isCtrl()) {
+ ChainPredID = PI->getSUnit()->NodeNum;
+ break;
+ }
+ }
+ // Check if this chain-like pred has been seen
+ // before. ChainPredID==MaxNodeID at the top of the schedule.
+ unsigned NumChains = StoreChainDependents.size();
+ std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
+ StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
+ if (Result.second)
+ StoreChainDependents.resize(NumChains + 1);
+ StoreChainDependents[Result.first->second].push_back(SU);
+ }
+
+ // Iterate over the store chains.
+ for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
+ clusterNeighboringMemOps(StoreChainDependents[Idx], DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create cluster edges between instructions
+/// that may be fused by the processor into a single operation.
+class MacroFusion : public ScheduleDAGMutation {
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+public:
+ MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
+ : TII(TII), TRI(TRI) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+} // anonymous
+
+/// Returns true if \p MI reads a register written by \p Other.
+static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
+ const MachineInstr &Other) {
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Other.modifiesRegister(Reg, &TRI))
+ return true;
+ }
+ return false;
+}
+
+/// \brief Callback from DAG postProcessing to create cluster edges to encourage
+/// fused operations.
+void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
+ // For now, assume targets can only fuse with the branch.
+ SUnit &ExitSU = DAG->ExitSU;
+ MachineInstr *Branch = ExitSU.getInstr();
+ if (!Branch)
+ return;
+
+ for (SUnit &SU : DAG->SUnits) {
+ // SUnits with successors can't be schedule in front of the ExitSU.
+ if (!SU.Succs.empty())
+ continue;
+ // We only care if the node writes to a register that the branch reads.
+ MachineInstr *Pred = SU.getInstr();
+ if (!HasDataDep(TRI, *Branch, *Pred))
+ continue;
+
+ if (!TII.shouldScheduleAdjacent(*Pred, *Branch))
+ continue;
+
+ // Create a single weak edge from SU to ExitSU. The only effect is to cause
+ // bottom-up scheduling to heavily prioritize the clustered SU. There is no
+ // need to copy predecessor edges from ExitSU to SU, since top-down
+ // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
+ // of SU, we could create an artificial edge from the deepest root, but it
+ // hasn't been needed yet.
+ bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
+ (void)Success;
+ assert(Success && "No DAG nodes should be reachable from ExitSU");
+
+ DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
+ break;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// CopyConstrain - DAG post-processing to encourage copy elimination.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create weak edges from all uses of a copy to
+/// the one use that defines the copy's source vreg, most likely an induction
+/// variable increment.
+class CopyConstrain : public ScheduleDAGMutation {
+ // Transient state.
+ SlotIndex RegionBeginIdx;
+ // RegionEndIdx is the slot index of the last non-debug instruction in the
+ // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
+ SlotIndex RegionEndIdx;
+public:
+ CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+
+protected:
+ void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
+};
+} // anonymous
+
+/// constrainLocalCopy handles two possibilities:
+/// 1) Local src:
+/// I0: = dst
+/// I1: src = ...
+/// I2: = dst
+/// I3: dst = src (copy)
+/// (create pred->succ edges I0->I1, I2->I1)
+///
+/// 2) Local copy:
+/// I0: dst = src (copy)
+/// I1: = dst
+/// I2: src = ...
+/// I3: = dst
+/// (create pred->succ edges I1->I2, I3->I2)
+///
+/// Although the MachineScheduler is currently constrained to single blocks,
+/// this algorithm should handle extended blocks. An EBB is a set of
+/// contiguously numbered blocks such that the previous block in the EBB is
+/// always the single predecessor.
+void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
+ LiveIntervals *LIS = DAG->getLIS();
+ MachineInstr *Copy = CopySU->getInstr();
+
+ // Check for pure vreg copies.
+ const MachineOperand &SrcOp = Copy->getOperand(1);
+ unsigned SrcReg = SrcOp.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
+ return;
+
+ const MachineOperand &DstOp = Copy->getOperand(0);
+ unsigned DstReg = DstOp.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
+ return;
+
+ // Check if either the dest or source is local. If it's live across a back
+ // edge, it's not local. Note that if both vregs are live across the back
+ // edge, we cannot successfully contrain the copy without cyclic scheduling.
+ // If both the copy's source and dest are local live intervals, then we
+ // should treat the dest as the global for the purpose of adding
+ // constraints. This adds edges from source's other uses to the copy.
+ unsigned LocalReg = SrcReg;
+ unsigned GlobalReg = DstReg;
+ LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
+ LocalReg = DstReg;
+ GlobalReg = SrcReg;
+ LocalLI = &LIS->getInterval(LocalReg);
+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
+ return;
+ }
+ LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
+
+ // Find the global segment after the start of the local LI.
+ LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
+ // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
+ // local live range. We could create edges from other global uses to the local
+ // start, but the coalescer should have already eliminated these cases, so
+ // don't bother dealing with it.
+ if (GlobalSegment == GlobalLI->end())
+ return;
+
+ // If GlobalSegment is killed at the LocalLI->start, the call to find()
+ // returned the next global segment. But if GlobalSegment overlaps with
+ // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
+ // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
+ if (GlobalSegment->contains(LocalLI->beginIndex()))
+ ++GlobalSegment;
+
+ if (GlobalSegment == GlobalLI->end())
+ return;
+
+ // Check if GlobalLI contains a hole in the vicinity of LocalLI.
+ if (GlobalSegment != GlobalLI->begin()) {
+ // Two address defs have no hole.
+ if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
+ GlobalSegment->start)) {
+ return;
+ }
+ // If the prior global segment may be defined by the same two-address
+ // instruction that also defines LocalLI, then can't make a hole here.
+ if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
+ LocalLI->beginIndex())) {
+ return;
+ }
+ // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
+ // it would be a disconnected component in the live range.
+ assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
+ "Disconnected LRG within the scheduling region.");
+ }
+ MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
+ if (!GlobalDef)
+ return;
+
+ SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
+ if (!GlobalSU)
+ return;
+
+ // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
+ // constraining the uses of the last local def to precede GlobalDef.
+ SmallVector<SUnit*,8> LocalUses;
+ const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
+ MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
+ SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
+ for (SUnit::const_succ_iterator
+ I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
+ I != E; ++I) {
+ if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
+ continue;
+ if (I->getSUnit() == GlobalSU)
+ continue;
+ if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
+ return;
+ LocalUses.push_back(I->getSUnit());
+ }
+ // Open the top of the GlobalLI hole by constraining any earlier global uses
+ // to precede the start of LocalLI.
+ SmallVector<SUnit*,8> GlobalUses;
+ MachineInstr *FirstLocalDef =
+ LIS->getInstructionFromIndex(LocalLI->beginIndex());
+ SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
+ for (SUnit::const_pred_iterator
+ I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
+ if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
+ continue;
+ if (I->getSUnit() == FirstLocalSU)
+ continue;
+ if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
+ return;
+ GlobalUses.push_back(I->getSUnit());
+ }
+ DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
+ // Add the weak edges.
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
+ DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
+ << GlobalSU->NodeNum << ")\n");
+ DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
+ }
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
+ DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
+ << FirstLocalSU->NodeNum << ")\n");
+ DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
+ }
+}
+
+/// \brief Callback from DAG postProcessing to create weak edges to encourage
+/// copy elimination.
+void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+ assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
+
+ MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
+ if (FirstPos == DAG->end())
+ return;
+ RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
+ RegionEndIdx = DAG->getLIS()->getInstructionIndex(
+ *priorNonDebug(DAG->end(), DAG->begin()));
+
+ for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+ SUnit *SU = &DAG->SUnits[Idx];
+ if (!SU->getInstr()->isCopy())
+ continue;
+
+ constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
+// and possibly other custom schedulers.
+//===----------------------------------------------------------------------===//
+
+static const unsigned InvalidCycle = ~0U;
+
+SchedBoundary::~SchedBoundary() { delete HazardRec; }
+
+void SchedBoundary::reset() {
+ // A new HazardRec is created for each DAG and owned by SchedBoundary.
+ // Destroying and reconstructing it is very expensive though. So keep
+ // invalid, placeholder HazardRecs.
+ if (HazardRec && HazardRec->isEnabled()) {
+ delete HazardRec;
+ HazardRec = nullptr;
+ }
+ Available.clear();
+ Pending.clear();
+ CheckPending = false;
+ NextSUs.clear();
+ CurrCycle = 0;
+ CurrMOps = 0;
+ MinReadyCycle = UINT_MAX;
+ ExpectedLatency = 0;
+ DependentLatency = 0;
+ RetiredMOps = 0;
+ MaxExecutedResCount = 0;
+ ZoneCritResIdx = 0;
+ IsResourceLimited = false;
+ ReservedCycles.clear();
+#ifndef NDEBUG
+ // Track the maximum number of stall cycles that could arise either from the
+ // latency of a DAG edge or the number of cycles that a processor resource is
+ // reserved (SchedBoundary::ReservedCycles).
+ MaxObservedStall = 0;
+#endif
+ // Reserve a zero-count for invalid CritResIdx.
+ ExecutedResCounts.resize(1);
+ assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
+}
+
+void SchedRemainder::
+init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
+ reset();
+ if (!SchedModel->hasInstrSchedModel())
+ return;
+ RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
+ for (std::vector<SUnit>::iterator
+ I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
+ RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
+ * SchedModel->getMicroOpFactor();
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned PIdx = PI->ProcResourceIdx;
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ RemainingCounts[PIdx] += (Factor * PI->Cycles);
+ }
+ }
+}
+
+void SchedBoundary::
+init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
+ reset();
+ DAG = dag;
+ SchedModel = smodel;
+ Rem = rem;
+ if (SchedModel->hasInstrSchedModel()) {
+ ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
+ ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
+ }
+}
+
+/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
+/// these "soft stalls" differently than the hard stall cycles based on CPU
+/// resources and computed by checkHazard(). A fully in-order model
+/// (MicroOpBufferSize==0) will not make use of this since instructions are not
+/// available for scheduling until they are ready. However, a weaker in-order
+/// model may use this for heuristics. For example, if a processor has in-order
+/// behavior when reading certain resources, this may come into play.
+unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
+ if (!SU->isUnbuffered)
+ return 0;
+
+ unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
+ if (ReadyCycle > CurrCycle)
+ return ReadyCycle - CurrCycle;
+ return 0;
+}
+
+/// Compute the next cycle at which the given processor resource can be
+/// scheduled.
+unsigned SchedBoundary::
+getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+ unsigned NextUnreserved = ReservedCycles[PIdx];
+ // If this resource has never been used, always return cycle zero.
+ if (NextUnreserved == InvalidCycle)
+ return 0;
+ // For bottom-up scheduling add the cycles needed for the current operation.
+ if (!isTop())
+ NextUnreserved += Cycles;
+ return NextUnreserved;
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool SchedBoundary::checkHazard(SUnit *SU) {
+ if (HazardRec->isEnabled()
+ && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
+ return true;
+ }
+ unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+ if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
+ << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
+ return true;
+ }
+ if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
+ if (NRCycle > CurrCycle) {
+#ifndef NDEBUG
+ MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
+#endif
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
+ << SchedModel->getResourceName(PI->ProcResourceIdx)
+ << "=" << NRCycle << "c\n");
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// Find the unscheduled node in ReadySUs with the highest latency.
+unsigned SchedBoundary::
+findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
+ SUnit *LateSU = nullptr;
+ unsigned RemLatency = 0;
+ for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
+ I != E; ++I) {
+ unsigned L = getUnscheduledLatency(*I);
+ if (L > RemLatency) {
+ RemLatency = L;
+ LateSU = *I;
+ }
+ }
+ if (LateSU) {
+ DEBUG(dbgs() << Available.getName() << " RemLatency SU("
+ << LateSU->NodeNum << ") " << RemLatency << "c\n");
+ }
+ return RemLatency;
+}
+
+// Count resources in this zone and the remaining unscheduled
+// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
+// resource index, or zero if the zone is issue limited.
+unsigned SchedBoundary::
+getOtherResourceCount(unsigned &OtherCritIdx) {
+ OtherCritIdx = 0;
+ if (!SchedModel->hasInstrSchedModel())
+ return 0;
+
+ unsigned OtherCritCount = Rem->RemIssueCount
+ + (RetiredMOps * SchedModel->getMicroOpFactor());
+ DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
+ << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
+ for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
+ PIdx != PEnd; ++PIdx) {
+ unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
+ if (OtherCount > OtherCritCount) {
+ OtherCritCount = OtherCount;
+ OtherCritIdx = PIdx;
+ }
+ }
+ if (OtherCritIdx) {
+ DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: "
+ << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
+ << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
+ }
+ return OtherCritCount;
+}
+
+void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+#ifndef NDEBUG
+ // ReadyCycle was been bumped up to the CurrCycle when this node was
+ // scheduled, but CurrCycle may have been eagerly advanced immediately after
+ // scheduling, so may now be greater than ReadyCycle.
+ if (ReadyCycle > CurrCycle)
+ MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
+#endif
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ // Check for interlocks first. For the purpose of other heuristics, an
+ // instruction that cannot issue appears as if it's not in the ReadyQueue.
+ bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
+ if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
+ Available.size() >= ReadyListLimit)
+ Pending.push(SU);
+ else
+ Available.push(SU);
+
+ // Record this node as an immediate dependent of the scheduled node.
+ NextSUs.insert(SU);
+}
+
+void SchedBoundary::releaseTopNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ releaseNode(SU, SU->TopReadyCycle);
+}
+
+void SchedBoundary::releaseBottomNode(SUnit *SU) {
+ if (SU->isScheduled)
+ return;
+
+ releaseNode(SU, SU->BotReadyCycle);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void SchedBoundary::bumpCycle(unsigned NextCycle) {
+ if (SchedModel->getMicroOpBufferSize() == 0) {
+ assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ if (MinReadyCycle > NextCycle)
+ NextCycle = MinReadyCycle;
+ }
+ // Update the current micro-ops, which will issue in the next cycle.
+ unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
+ CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
+
+ // Decrement DependentLatency based on the next cycle.
+ if ((NextCycle - CurrCycle) > DependentLatency)
+ DependentLatency = 0;
+ else
+ DependentLatency -= (NextCycle - CurrCycle);
+
+ if (!HazardRec->isEnabled()) {
+ // Bypass HazardRec virtual calls.
+ CurrCycle = NextCycle;
+ } else {
+ // Bypass getHazardType calls in case of long latency.
+ for (; CurrCycle != NextCycle; ++CurrCycle) {
+ if (isTop())
+ HazardRec->AdvanceCycle();
+ else
+ HazardRec->RecedeCycle();
+ }
+ }
+ CheckPending = true;
+ unsigned LFactor = SchedModel->getLatencyFactor();
+ IsResourceLimited =
+ (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
+ > (int)LFactor;
+
+ DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
+}
+
+void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
+ ExecutedResCounts[PIdx] += Count;
+ if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
+ MaxExecutedResCount = ExecutedResCounts[PIdx];
+}
+
+/// Add the given processor resource to this scheduled zone.
+///
+/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
+/// during which this resource is consumed.
+///
+/// \return the next cycle at which the instruction may execute without
+/// oversubscribing resources.
+unsigned SchedBoundary::
+countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
+ unsigned Factor = SchedModel->getResourceFactor(PIdx);
+ unsigned Count = Factor * Cycles;
+ DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx)
+ << " +" << Cycles << "x" << Factor << "u\n");
+
+ // Update Executed resources counts.
+ incExecutedResources(PIdx, Count);
+ assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
+ Rem->RemainingCounts[PIdx] -= Count;
+
+ // Check if this resource exceeds the current critical resource. If so, it
+ // becomes the critical resource.
+ if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
+ ZoneCritResIdx = PIdx;
+ DEBUG(dbgs() << " *** Critical resource "
+ << SchedModel->getResourceName(PIdx) << ": "
+ << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
+ }
+ // For reserved resources, record the highest cycle using the resource.
+ unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
+ if (NextAvailable > CurrCycle) {
+ DEBUG(dbgs() << " Resource conflict: "
+ << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
+ << NextAvailable << "\n");
+ }
+ return NextAvailable;
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void SchedBoundary::bumpNode(SUnit *SU) {
+ // Update the reservation table.
+ if (HazardRec->isEnabled()) {
+ if (!isTop() && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+ HazardRec->EmitInstruction(SU);
+ }
+ // checkHazard should prevent scheduling multiple instructions per cycle that
+ // exceed the issue width.
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
+ assert(
+ (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
+ "Cannot schedule this instruction's MicroOps in the current cycle.");
+
+ unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
+ DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
+
+ unsigned NextCycle = CurrCycle;
+ switch (SchedModel->getMicroOpBufferSize()) {
+ case 0:
+ assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
+ break;
+ case 1:
+ if (ReadyCycle > NextCycle) {
+ NextCycle = ReadyCycle;
+ DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
+ }
+ break;
+ default:
+ // We don't currently model the OOO reorder buffer, so consider all
+ // scheduled MOps to be "retired". We do loosely model in-order resource
+ // latency. If this instruction uses an in-order resource, account for any
+ // likely stall cycles.
+ if (SU->isUnbuffered && ReadyCycle > NextCycle)
+ NextCycle = ReadyCycle;
+ break;
+ }
+ RetiredMOps += IncMOps;
+
+ // Update resource counts and critical resource.
+ if (SchedModel->hasInstrSchedModel()) {
+ unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
+ assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
+ Rem->RemIssueCount -= DecRemIssue;
+ if (ZoneCritResIdx) {
+ // Scale scheduled micro-ops for comparing with the critical resource.
+ unsigned ScaledMOps =
+ RetiredMOps * SchedModel->getMicroOpFactor();
+
+ // If scaled micro-ops are now more than the previous critical resource by
+ // a full cycle, then micro-ops issue becomes critical.
+ if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
+ >= (int)SchedModel->getLatencyFactor()) {
+ ZoneCritResIdx = 0;
+ DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
+ << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
+ }
+ }
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned RCycle =
+ countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
+ if (RCycle > NextCycle)
+ NextCycle = RCycle;
+ }
+ if (SU->hasReservedResource) {
+ // For reserved resources, record the highest cycle using the resource.
+ // For top-down scheduling, this is the cycle in which we schedule this
+ // instruction plus the number of cycles the operations reserves the
+ // resource. For bottom-up is it simply the instruction's cycle.
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ unsigned PIdx = PI->ProcResourceIdx;
+ if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
+ if (isTop()) {
+ ReservedCycles[PIdx] =
+ std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
+ }
+ else
+ ReservedCycles[PIdx] = NextCycle;
+ }
+ }
+ }
+ }
+ // Update ExpectedLatency and DependentLatency.
+ unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
+ unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
+ if (SU->getDepth() > TopLatency) {
+ TopLatency = SU->getDepth();
+ DEBUG(dbgs() << " " << Available.getName()
+ << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
+ }
+ if (SU->getHeight() > BotLatency) {
+ BotLatency = SU->getHeight();
+ DEBUG(dbgs() << " " << Available.getName()
+ << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
+ }
+ // If we stall for any reason, bump the cycle.
+ if (NextCycle > CurrCycle) {
+ bumpCycle(NextCycle);
+ } else {
+ // After updating ZoneCritResIdx and ExpectedLatency, check if we're
+ // resource limited. If a stall occurred, bumpCycle does this.
+ unsigned LFactor = SchedModel->getLatencyFactor();
+ IsResourceLimited =
+ (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
+ > (int)LFactor;
+ }
+ // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
+ // resets CurrMOps. Loop to handle instructions with more MOps than issue in
+ // one cycle. Since we commonly reach the max MOps here, opportunistically
+ // bump the cycle to avoid uselessly checking everything in the readyQ.
+ CurrMOps += IncMOps;
+ while (CurrMOps >= SchedModel->getIssueWidth()) {
+ DEBUG(dbgs() << " *** Max MOps " << CurrMOps
+ << " at cycle " << CurrCycle << '\n');
+ bumpCycle(++NextCycle);
+ }
+ DEBUG(dumpScheduledState());
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void SchedBoundary::releasePending() {
+ // If the available queue is empty, it is safe to reset MinReadyCycle.
+ if (Available.empty())
+ MinReadyCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
+ for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+ SUnit *SU = *(Pending.begin()+i);
+ unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+ if (ReadyCycle < MinReadyCycle)
+ MinReadyCycle = ReadyCycle;
+
+ if (!IsBuffered && ReadyCycle > CurrCycle)
+ continue;
+
+ if (checkHazard(SU))
+ continue;
+
+ if (Available.size() >= ReadyListLimit)
+ break;
+
+ Available.push(SU);
+ Pending.remove(Pending.begin()+i);
+ --i; --e;
+ }
+ CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void SchedBoundary::removeReady(SUnit *SU) {
+ if (Available.isInQueue(SU))
+ Available.remove(Available.find(SU));
+ else {
+ assert(Pending.isInQueue(SU) && "bad ready count");
+ Pending.remove(Pending.find(SU));
+ }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// defer any nodes that now hit a hazard, and advance the cycle until at least
+/// one node is ready. If multiple instructions are ready, return NULL.
+SUnit *SchedBoundary::pickOnlyChoice() {
+ if (CheckPending)
+ releasePending();
+
+ if (CurrMOps > 0) {
+ // Defer any ready instrs that now have a hazard.
+ for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+ if (checkHazard(*I)) {
+ Pending.push(*I);
+ I = Available.remove(I);
+ continue;
+ }
+ ++I;
+ }
+ }
+ for (unsigned i = 0; Available.empty(); ++i) {
+// FIXME: Re-enable assert once PR20057 is resolved.
+// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
+// "permanent hazard");
+ (void)i;
+ bumpCycle(CurrCycle + 1);
+ releasePending();
+ }
+
+ DEBUG(Pending.dump());
+ DEBUG(Available.dump());
+
+ if (Available.size() == 1)
+ return *Available.begin();
+ return nullptr;
+}
+
+#ifndef NDEBUG
+// This is useful information to dump after bumpNode.
+// Note that the Queue contents are more useful before pickNodeFromQueue.
+void SchedBoundary::dumpScheduledState() {
+ unsigned ResFactor;
+ unsigned ResCount;
+ if (ZoneCritResIdx) {
+ ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
+ ResCount = getResourceCount(ZoneCritResIdx);
+ } else {
+ ResFactor = SchedModel->getMicroOpFactor();
+ ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
+ }
+ unsigned LFactor = SchedModel->getLatencyFactor();
+ dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
+ << " Retired: " << RetiredMOps;
+ dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
+ dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
+ << ResCount / ResFactor << " "
+ << SchedModel->getResourceName(ZoneCritResIdx)
+ << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
+ << (IsResourceLimited ? " - Resource" : " - Latency")
+ << " limited.\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// GenericScheduler - Generic implementation of MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+void GenericSchedulerBase::SchedCandidate::
+initResourceDelta(const ScheduleDAGMI *DAG,
+ const TargetSchedModel *SchedModel) {
+ if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
+ return;
+
+ const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel->getWriteProcResBegin(SC),
+ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+ if (PI->ProcResourceIdx == Policy.ReduceResIdx)
+ ResDelta.CritResources += PI->Cycles;
+ if (PI->ProcResourceIdx == Policy.DemandResIdx)
+ ResDelta.DemandedResources += PI->Cycles;
+ }
+}
+
+/// Set the CandPolicy given a scheduling zone given the current resources and
+/// latencies inside and outside the zone.
+void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
+ SchedBoundary &CurrZone,
+ SchedBoundary *OtherZone) {
+ // Apply preemptive heuristics based on the total latency and resources
+ // inside and outside this zone. Potential stalls should be considered before
+ // following this policy.
+
+ // Compute remaining latency. We need this both to determine whether the
+ // overall schedule has become latency-limited and whether the instructions
+ // outside this zone are resource or latency limited.
+ //
+ // The "dependent" latency is updated incrementally during scheduling as the
+ // max height/depth of scheduled nodes minus the cycles since it was
+ // scheduled:
+ // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+ //
+ // The "independent" latency is the max ready queue depth:
+ // ILat = max N.depth for N in Available|Pending
+ //
+ // RemainingLatency is the greater of independent and dependent latency.
+ unsigned RemLatency = CurrZone.getDependentLatency();
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Available.elements()));
+ RemLatency = std::max(RemLatency,
+ CurrZone.findMaxLatency(CurrZone.Pending.elements()));
+
+ // Compute the critical resource outside the zone.
+ unsigned OtherCritIdx = 0;
+ unsigned OtherCount =
+ OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
+
+ bool OtherResLimited = false;
+ if (SchedModel->hasInstrSchedModel()) {
+ unsigned LFactor = SchedModel->getLatencyFactor();
+ OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
+ }
+ // Schedule aggressively for latency in PostRA mode. We don't check for
+ // acyclic latency during PostRA, and highly out-of-order processors will
+ // skip PostRA scheduling.
+ if (!OtherResLimited) {
+ if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
+ Policy.ReduceLatency |= true;
+ DEBUG(dbgs() << " " << CurrZone.Available.getName()
+ << " RemainingLatency " << RemLatency << " + "
+ << CurrZone.getCurrCycle() << "c > CritPath "
+ << Rem.CriticalPath << "\n");
+ }
+ }
+ // If the same resource is limiting inside and outside the zone, do nothing.
+ if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
+ return;
+
+ DEBUG(
+ if (CurrZone.isResourceLimited()) {
+ dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
+ << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
+ << "\n";
+ }
+ if (OtherResLimited)
+ dbgs() << " RemainingLimit: "
+ << SchedModel->getResourceName(OtherCritIdx) << "\n";
+ if (!CurrZone.isResourceLimited() && !OtherResLimited)
+ dbgs() << " Latency limited both directions.\n");
+
+ if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
+ Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
+
+ if (OtherResLimited)
+ Policy.DemandResIdx = OtherCritIdx;
+}
+
+#ifndef NDEBUG
+const char *GenericSchedulerBase::getReasonStr(
+ GenericSchedulerBase::CandReason Reason) {
+ switch (Reason) {
+ case NoCand: return "NOCAND ";
+ case Only1: return "ONLY1 ";
+ case PhysRegCopy: return "PREG-COPY ";
+ case RegExcess: return "REG-EXCESS";
+ case RegCritical: return "REG-CRIT ";
+ case Stall: return "STALL ";
+ case Cluster: return "CLUSTER ";
+ case Weak: return "WEAK ";
+ case RegMax: return "REG-MAX ";
+ case ResourceReduce: return "RES-REDUCE";
+ case ResourceDemand: return "RES-DEMAND";
+ case TopDepthReduce: return "TOP-DEPTH ";
+ case TopPathReduce: return "TOP-PATH ";
+ case BotHeightReduce:return "BOT-HEIGHT";
+ case BotPathReduce: return "BOT-PATH ";
+ case NextDefUse: return "DEF-USE ";
+ case NodeOrder: return "ORDER ";
+ };
+ llvm_unreachable("Unknown reason!");
+}
+
+void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
+ PressureChange P;
+ unsigned ResIdx = 0;
+ unsigned Latency = 0;
+ switch (Cand.Reason) {
+ default:
+ break;
+ case RegExcess:
+ P = Cand.RPDelta.Excess;
+ break;
+ case RegCritical:
+ P = Cand.RPDelta.CriticalMax;
+ break;
+ case RegMax:
+ P = Cand.RPDelta.CurrentMax;
+ break;
+ case ResourceReduce:
+ ResIdx = Cand.Policy.ReduceResIdx;
+ break;
+ case ResourceDemand:
+ ResIdx = Cand.Policy.DemandResIdx;
+ break;
+ case TopDepthReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ case TopPathReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotHeightReduce:
+ Latency = Cand.SU->getHeight();
+ break;
+ case BotPathReduce:
+ Latency = Cand.SU->getDepth();
+ break;
+ }
+ dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+ if (P.isValid())
+ dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
+ << ":" << P.getUnitInc() << " ";
+ else
+ dbgs() << " ";
+ if (ResIdx)
+ dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
+ else
+ dbgs() << " ";
+ if (Latency)
+ dbgs() << " " << Latency << " cycles ";
+ else
+ dbgs() << " ";
+ dbgs() << '\n';
+}
+#endif
+
+/// Return true if this heuristic determines order.
+static bool tryLess(int TryVal, int CandVal,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
+ if (TryVal < CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal > CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+
+static bool tryGreater(int TryVal, int CandVal,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason) {
+ if (TryVal > CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal < CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ return false;
+}
+
+static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ SchedBoundary &Zone) {
+ if (Zone.isTop()) {
+ if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, GenericSchedulerBase::TopPathReduce))
+ return true;
+ } else {
+ if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+ if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+ TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
+ return true;
+ }
+ if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+ TryCand, Cand, GenericSchedulerBase::BotPathReduce))
+ return true;
+ }
+ return false;
+}
+
+static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
+ DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
+ << GenericSchedulerBase::getReasonStr(Reason) << '\n');
+}
+
+static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
+ tracePick(Cand.Reason, Cand.AtTop);
+}
+
+void GenericScheduler::initialize(ScheduleDAGMI *dag) {
+ assert(dag->hasVRegLiveness() &&
+ "(PreRA)GenericScheduler needs vreg liveness");
+ DAG = static_cast<ScheduleDAGMILive*>(dag);
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
+
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ Bot.init(DAG, SchedModel, &Rem);
+
+ // Initialize resource counts.
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+ // are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+ if (!Top.HazardRec) {
+ Top.HazardRec =
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ }
+ if (!Bot.HazardRec) {
+ Bot.HazardRec =
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ }
+ TopCand.SU = nullptr;
+ BotCand.SU = nullptr;
+}
+
+/// Initialize the per-region scheduling policy.
+void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs) {
+ const MachineFunction &MF = *Begin->getParent()->getParent();
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+
+ // Avoid setting up the register pressure tracker for small regions to save
+ // compile time. As a rough heuristic, only track pressure when the number of
+ // schedulable instructions exceeds half the integer register file.
+ RegionPolicy.ShouldTrackPressure = true;
+ for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
+ MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
+ if (TLI->isTypeLegal(LegalIntVT)) {
+ unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
+ TLI->getRegClassFor(LegalIntVT));
+ RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
+ }
+ }
+
+ // For generic targets, we default to bottom-up, because it's simpler and more
+ // compile-time optimizations have been implemented in that direction.
+ RegionPolicy.OnlyBottomUp = true;
+
+ // Allow the subtarget to override default policy.
+ MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
+
+ // After subtarget overrides, apply command line options.
+ if (!EnableRegPressure)
+ RegionPolicy.ShouldTrackPressure = false;
+
+ // Check -misched-topdown/bottomup can force or unforce scheduling direction.
+ // e.g. -misched-bottomup=false allows scheduling in both directions.
+ assert((!ForceTopDown || !ForceBottomUp) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ if (ForceBottomUp.getNumOccurrences() > 0) {
+ RegionPolicy.OnlyBottomUp = ForceBottomUp;
+ if (RegionPolicy.OnlyBottomUp)
+ RegionPolicy.OnlyTopDown = false;
+ }
+ if (ForceTopDown.getNumOccurrences() > 0) {
+ RegionPolicy.OnlyTopDown = ForceTopDown;
+ if (RegionPolicy.OnlyTopDown)
+ RegionPolicy.OnlyBottomUp = false;
+ }
+}
+
+void GenericScheduler::dumpPolicy() {
+ dbgs() << "GenericScheduler RegionPolicy: "
+ << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
+ << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
+ << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
+ << "\n";
+}
+
+/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
+/// critical path by more cycles than it takes to drain the instruction buffer.
+/// We estimate an upper bounds on in-flight instructions as:
+///
+/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
+/// InFlightIterations = AcyclicPath / CyclesPerIteration
+/// InFlightResources = InFlightIterations * LoopResources
+///
+/// TODO: Check execution resources in addition to IssueCount.
+void GenericScheduler::checkAcyclicLatency() {
+ if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
+ return;
+
+ // Scaled number of cycles per loop iteration.
+ unsigned IterCount =
+ std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
+ Rem.RemIssueCount);
+ // Scaled acyclic critical path.
+ unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
+ // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
+ unsigned InFlightCount =
+ (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
+ unsigned BufferLimit =
+ SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
+
+ Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
+
+ DEBUG(dbgs() << "IssueCycles="
+ << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
+ << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
+ << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
+ << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
+ << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
+ if (Rem.IsAcyclicLatencyLimited)
+ dbgs() << " ACYCLIC LATENCY LIMIT\n");
+}
+
+void GenericScheduler::registerRoots() {
+ Rem.CriticalPath = DAG->ExitSU.getDepth();
+
+ // Some roots may not feed into ExitSU. Check all of them in case.
+ for (std::vector<SUnit*>::const_iterator
+ I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
+ if ((*I)->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = (*I)->getDepth();
+ }
+ DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
+ }
+
+ if (EnableCyclicPath) {
+ Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
+ checkAcyclicLatency();
+ }
+}
+
+static bool tryPressure(const PressureChange &TryP,
+ const PressureChange &CandP,
+ GenericSchedulerBase::SchedCandidate &TryCand,
+ GenericSchedulerBase::SchedCandidate &Cand,
+ GenericSchedulerBase::CandReason Reason,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) {
+ // If one candidate decreases and the other increases, go with it.
+ // Invalid candidates have UnitInc==0.
+ if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
+ Reason)) {
+ return true;
+ }
+ // Do not compare the magnitude of pressure changes between top and bottom
+ // boundary.
+ if (Cand.AtTop != TryCand.AtTop)
+ return false;
+
+ // If both candidates affect the same set in the same boundary, go with the
+ // smallest increase.
+ unsigned TryPSet = TryP.getPSetOrMax();
+ unsigned CandPSet = CandP.getPSetOrMax();
+ if (TryPSet == CandPSet) {
+ return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
+ Reason);
+ }
+
+ int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
+ std::numeric_limits<int>::max();
+
+ int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
+ std::numeric_limits<int>::max();
+
+ // If the candidates are decreasing pressure, reverse priority.
+ if (TryP.getUnitInc() < 0)
+ std::swap(TryRank, CandRank);
+ return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
+}
+
+static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
+ return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
+}
+
+/// Minimize physical register live ranges. Regalloc wants them adjacent to
+/// their physreg def/use.
+///
+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
+/// with the operation that produces or consumes the physreg. We'll do this when
+/// regalloc has support for parallel copies.
+static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+ const MachineInstr *MI = SU->getInstr();
+ if (!MI->isCopy())
+ return 0;
+
+ unsigned ScheduledOper = isTop ? 1 : 0;
+ unsigned UnscheduledOper = isTop ? 0 : 1;
+ // If we have already scheduled the physreg produce/consumer, immediately
+ // schedule the copy.
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(ScheduledOper).getReg()))
+ return 1;
+ // If the physreg is at the boundary, defer it. Otherwise schedule it
+ // immediately to free the dependent. We can hoist the copy later.
+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(UnscheduledOper).getReg()))
+ return AtBoundary ? -1 : 1;
+ return 0;
+}
+
+void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
+ bool AtTop,
+ const RegPressureTracker &RPTracker,
+ RegPressureTracker &TempTracker) {
+ Cand.SU = SU;
+ Cand.AtTop = AtTop;
+ if (DAG->isTrackingPressure()) {
+ if (AtTop) {
+ TempTracker.getMaxDownwardPressureDelta(
+ Cand.SU->getInstr(),
+ Cand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ } else {
+ if (VerifyScheduling) {
+ TempTracker.getMaxUpwardPressureDelta(
+ Cand.SU->getInstr(),
+ &DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ } else {
+ RPTracker.getUpwardPressureDelta(
+ Cand.SU->getInstr(),
+ DAG->getPressureDiff(Cand.SU),
+ Cand.RPDelta,
+ DAG->getRegionCriticalPSets(),
+ DAG->getRegPressure().MaxSetPressure);
+ }
+ }
+ }
+ DEBUG(if (Cand.RPDelta.Excess.isValid())
+ dbgs() << " Try SU(" << Cand.SU->NodeNum << ") "
+ << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet())
+ << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n");
+}
+
+/// Apply a set of heursitics to a new candidate. Heuristics are currently
+/// hierarchical. This may be more efficient than a graduated cost model because
+/// we don't need to evaluate all aspects of the model for each node in the
+/// queue. But it's really done to make the heuristics easier to debug and
+/// statistically analyze.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+/// \param Zone describes the scheduled zone that we are extending, or nullptr
+// if Cand is from a different zone than TryCand.
+void GenericScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) {
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+
+ if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
+ biasPhysRegCopy(Cand.SU, Cand.AtTop),
+ TryCand, Cand, PhysRegCopy))
+ return;
+
+ // Avoid exceeding the target's limit.
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
+ Cand.RPDelta.Excess,
+ TryCand, Cand, RegExcess, TRI,
+ DAG->MF))
+ return;
+
+ // Avoid increasing the max critical pressure in the scheduled region.
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
+ Cand.RPDelta.CriticalMax,
+ TryCand, Cand, RegCritical, TRI,
+ DAG->MF))
+ return;
+
+ // We only compare a subset of features when comparing nodes between
+ // Top and Bottom boundary. Some properties are simply incomparable, in many
+ // other instances we should only override the other boundary if something
+ // is a clear good pick on one boundary. Skip heuristics that are more
+ // "tie-breaking" in nature.
+ bool SameBoundary = Zone != nullptr;
+ if (SameBoundary) {
+ // For loops that are acyclic path limited, aggressively schedule for
+ // latency. This can result in very long dependence chains scheduled in
+ // sequence, so once every cycle (when CurrMOps == 0), switch to normal
+ // heuristics.
+ if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
+ tryLatency(TryCand, Cand, *Zone))
+ return;
+
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
+ Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
+ }
+
+ // Keep clustered nodes together to encourage downstream peephole
+ // optimizations which may reduce resource requirements.
+ //
+ // This is a best effort to set things up for a post-RA pass. Optimizations
+ // like generating loads of multiple registers should ideally be done within
+ // the scheduler pass by combining the loads during DAG postprocessing.
+ const SUnit *CandNextClusterSU =
+ Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ const SUnit *TryCandNextClusterSU =
+ TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
+ if (tryGreater(TryCand.SU == TryCandNextClusterSU,
+ Cand.SU == CandNextClusterSU,
+ TryCand, Cand, Cluster))
+ return;
+
+ if (SameBoundary) {
+ // Weak edges are for clustering and other constraints.
+ if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
+ getWeakLeft(Cand.SU, Cand.AtTop),
+ TryCand, Cand, Weak))
+ return;
+ }
+
+ // Avoid increasing the max pressure of the entire region.
+ if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
+ Cand.RPDelta.CurrentMax,
+ TryCand, Cand, RegMax, TRI,
+ DAG->MF))
+ return;
+
+ if (SameBoundary) {
+ // Avoid critical resource consumption and balance the schedule.
+ TryCand.initResourceDelta(DAG, SchedModel);
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ // For acyclic path limited loops, latency was already checked above.
+ if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
+ return;
+
+ // Prefer immediate defs/users of the last scheduled instruction. This is a
+ // local pressure avoidance strategy that also makes the machine code
+ // readable.
+ if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU),
+ TryCand, Cand, NextDefUse))
+ return;
+
+ // Fall through to original instruction order.
+ if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
+ TryCand.Reason = NodeOrder;
+ }
+ }
+}
+
+/// Pick the best candidate from the queue.
+///
+/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
+/// DAG building. To adjust for the current scheduling location we need to
+/// maintain the number of vreg uses remaining to be top-scheduled.
+void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+ const CandPolicy &ZonePolicy,
+ const RegPressureTracker &RPTracker,
+ SchedCandidate &Cand) {
+ // getMaxPressureDelta temporarily modifies the tracker.
+ RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+ ReadyQueue &Q = Zone.Available;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+
+ SchedCandidate TryCand(ZonePolicy);
+ initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker);
+ // Pass SchedBoundary only when comparing nodes from the same boundary.
+ SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+ tryCandidate(Cand, TryCand, ZoneArg);
+ if (TryCand.Reason != NoCand) {
+ // Initialize resource delta if needed in case future heuristics query it.
+ if (TryCand.ResDelta == SchedResourceDelta())
+ TryCand.initResourceDelta(DAG, SchedModel);
+ Cand.setBest(TryCand);
+ DEBUG(traceCandidate(Cand));
+ }
+ }
+}
+
+/// Pick the best candidate node from either the top or bottom queue.
+SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
+ // Schedule as far as possible in the direction of no choice. This is most
+ // efficient, but also provides the best heuristics for CriticalPSets.
+ if (SUnit *SU = Bot.pickOnlyChoice()) {
+ IsTopNode = false;
+ tracePick(Only1, false);
+ return SU;
+ }
+ if (SUnit *SU = Top.pickOnlyChoice()) {
+ IsTopNode = true;
+ tracePick(Only1, true);
+ return SU;
+ }
+ // Set the bottom-up policy based on the state of the current bottom zone and
+ // the instructions outside the zone, including the top zone.
+ CandPolicy BotPolicy;
+ setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
+ // Set the top-down policy based on the state of the current top zone and
+ // the instructions outside the zone, including the bottom zone.
+ CandPolicy TopPolicy;
+ setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+
+ // See if BotCand is still valid (because we previously scheduled from Top).
+ DEBUG(dbgs() << "Picking from Bot:\n");
+ if (!BotCand.isValid() || BotCand.SU->isScheduled ||
+ BotCand.Policy != BotPolicy) {
+ BotCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(BotCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
+ assert(TCand.SU == BotCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
+ }
+
+ // Check if the top Q has a better candidate.
+ DEBUG(dbgs() << "Picking from Top:\n");
+ if (!TopCand.isValid() || TopCand.SU->isScheduled ||
+ TopCand.Policy != TopPolicy) {
+ TopCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+ } else {
+ DEBUG(traceCandidate(TopCand));
+#ifndef NDEBUG
+ if (VerifyScheduling) {
+ SchedCandidate TCand;
+ TCand.reset(CandPolicy());
+ pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
+ assert(TCand.SU == TopCand.SU &&
+ "Last pick result should correspond to re-picking right now");
+ }
+#endif
+ }
+
+ // Pick best from BotCand and TopCand.
+ assert(BotCand.isValid());
+ assert(TopCand.isValid());
+ SchedCandidate Cand = BotCand;
+ TopCand.Reason = NoCand;
+ tryCandidate(Cand, TopCand, nullptr);
+ if (TopCand.Reason != NoCand) {
+ Cand.setBest(TopCand);
+ DEBUG(traceCandidate(Cand));
+ }
+
+ IsTopNode = Cand.AtTop;
+ tracePick(Cand);
+ return Cand.SU;
+}
+
+/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
+SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() &&
+ Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ do {
+ if (RegionPolicy.OnlyTopDown) {
+ SU = Top.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ TopCand.reset(NoPolicy);
+ pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(TopCand);
+ SU = TopCand.SU;
+ }
+ IsTopNode = true;
+ } else if (RegionPolicy.OnlyBottomUp) {
+ SU = Bot.pickOnlyChoice();
+ if (!SU) {
+ CandPolicy NoPolicy;
+ BotCand.reset(NoPolicy);
+ pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
+ assert(BotCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(BotCand);
+ SU = BotCand.SU;
+ }
+ IsTopNode = false;
+ } else {
+ SU = pickNodeBidirectional(IsTopNode);
+ }
+ } while (SU->isScheduled);
+
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+
+ DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+ return SU;
+}
+
+void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+
+ MachineBasicBlock::iterator InsertPos = SU->getInstr();
+ if (!isTop)
+ ++InsertPos;
+ SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
+
+ // Find already scheduled copies with a single physreg dependence and move
+ // them just above the scheduled instruction.
+ for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
+ I != E; ++I) {
+ if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+ continue;
+ SUnit *DepSU = I->getSUnit();
+ if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
+ continue;
+ MachineInstr *Copy = DepSU->getInstr();
+ if (!Copy->isCopy())
+ continue;
+ DEBUG(dbgs() << " Rescheduling physreg copy ";
+ I->getSUnit()->dump(DAG));
+ DAG->moveInstruction(Copy, InsertPos);
+ }
+}
+
+/// Update the scheduler's state after scheduling a node. This is the same node
+/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
+/// update it's state based on the current cycle before MachineSchedStrategy
+/// does.
+///
+/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
+/// them here. See comments in biasPhysRegCopy.
+void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ if (IsTopNode) {
+ SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
+ Top.bumpNode(SU);
+ if (SU->hasPhysRegUses)
+ reschedulePhysRegCopies(SU, true);
+ } else {
+ SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
+ Bot.bumpNode(SU);
+ if (SU->hasPhysRegDefs)
+ reschedulePhysRegCopies(SU, false);
+ }
+}
+
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
+ ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
+ // Register DAG post-processors.
+ //
+ // FIXME: extend the mutation API to allow earlier mutations to instantiate
+ // data and pass it to later mutations. Have a single mutation that gathers
+ // the interesting nodes in one pass.
+ DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
+ if (EnableMemOpCluster) {
+ if (DAG->TII->enableClusterLoads())
+ DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
+ if (DAG->TII->enableClusterStores())
+ DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
+ }
+ if (EnableMacroFusion)
+ DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
+ return DAG;
+}
+
+static MachineSchedRegistry
+GenericSchedRegistry("converge", "Standard converging scheduler.",
+ createGenericSchedLive);
+
+//===----------------------------------------------------------------------===//
+// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
+ DAG = Dag;
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
+
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ BotRoots.clear();
+
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
+ // or are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+ if (!Top.HazardRec) {
+ Top.HazardRec =
+ DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+ Itin, DAG);
+ }
+}
+
+
+void PostGenericScheduler::registerRoots() {
+ Rem.CriticalPath = DAG->ExitSU.getDepth();
+
+ // Some roots may not feed into ExitSU. Check all of them in case.
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
+ if ((*I)->getDepth() > Rem.CriticalPath)
+ Rem.CriticalPath = (*I)->getDepth();
+ }
+ DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
+ if (DumpCriticalPathLength) {
+ errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
+ }
+}
+
+/// Apply a set of heursitics to a new candidate for PostRA scheduling.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand) {
+
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+
+ // Prioritize instructions that read unbuffered resources by stall cycles.
+ if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
+ Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+ return;
+
+ // Avoid critical resource consumption and balance the schedule.
+ if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+ TryCand, Cand, ResourceReduce))
+ return;
+ if (tryGreater(TryCand.ResDelta.DemandedResources,
+ Cand.ResDelta.DemandedResources,
+ TryCand, Cand, ResourceDemand))
+ return;
+
+ // Avoid serializing long latency dependence chains.
+ if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
+ return;
+ }
+
+ // Fall through to original instruction order.
+ if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
+ TryCand.Reason = NodeOrder;
+}
+
+void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
+ ReadyQueue &Q = Top.Available;
+ for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+ SchedCandidate TryCand(Cand.Policy);
+ TryCand.SU = *I;
+ TryCand.AtTop = true;
+ TryCand.initResourceDelta(DAG, SchedModel);
+ tryCandidate(Cand, TryCand);
+ if (TryCand.Reason != NoCand) {
+ Cand.setBest(TryCand);
+ DEBUG(traceCandidate(Cand));
+ }
+ }
+}
+
+/// Pick the next node to schedule.
+SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
+ if (DAG->top() == DAG->bottom()) {
+ assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
+ return nullptr;
+ }
+ SUnit *SU;
+ do {
+ SU = Top.pickOnlyChoice();
+ if (SU) {
+ tracePick(Only1, true);
+ } else {
+ CandPolicy NoPolicy;
+ SchedCandidate TopCand(NoPolicy);
+ // Set the top-down policy based on the state of the current top zone and
+ // the instructions outside the zone, including the bottom zone.
+ setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
+ pickNodeFromQueue(TopCand);
+ assert(TopCand.Reason != NoCand && "failed to find a candidate");
+ tracePick(TopCand);
+ SU = TopCand.SU;
+ }
+ } while (SU->isScheduled);
+
+ IsTopNode = true;
+ Top.removeReady(SU);
+
+ DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+ return SU;
+}
+
+/// Called after ScheduleDAGMI has scheduled an instruction and updated
+/// scheduled/remaining flags in the DAG nodes.
+void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+ SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
+ Top.bumpNode(SU);
+}
+
+/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
+static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
+ return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true);
+}
+
+//===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Order nodes by the ILP metric.
+struct ILPOrder {
+ const SchedDFSResult *DFSResult;
+ const BitVector *ScheduledTrees;
+ bool MaximizeILP;
+
+ ILPOrder(bool MaxILP)
+ : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {}
+
+ /// \brief Apply a less-than relation on node priority.
+ ///
+ /// (Return true if A comes after B in the Q.)
+ bool operator()(const SUnit *A, const SUnit *B) const {
+ unsigned SchedTreeA = DFSResult->getSubtreeID(A);
+ unsigned SchedTreeB = DFSResult->getSubtreeID(B);
+ if (SchedTreeA != SchedTreeB) {
+ // Unscheduled trees have lower priority.
+ if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
+ return ScheduledTrees->test(SchedTreeB);
+
+ // Trees with shallower connections have have lower priority.
+ if (DFSResult->getSubtreeLevel(SchedTreeA)
+ != DFSResult->getSubtreeLevel(SchedTreeB)) {
+ return DFSResult->getSubtreeLevel(SchedTreeA)
+ < DFSResult->getSubtreeLevel(SchedTreeB);
+ }
+ }
+ if (MaximizeILP)
+ return DFSResult->getILP(A) < DFSResult->getILP(B);
+ else
+ return DFSResult->getILP(A) > DFSResult->getILP(B);
+ }
+};
+
+/// \brief Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+ ScheduleDAGMILive *DAG;
+ ILPOrder Cmp;
+
+ std::vector<SUnit*> ReadyQ;
+public:
+ ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {}
+
+ void initialize(ScheduleDAGMI *dag) override {
+ assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
+ DAG = static_cast<ScheduleDAGMILive*>(dag);
+ DAG->computeDFSResult();
+ Cmp.DFSResult = DAG->getDFSResult();
+ Cmp.ScheduledTrees = &DAG->getScheduledTrees();
+ ReadyQ.clear();
+ }
+
+ void registerRoots() override {
+ // Restore the heap in ReadyQ with the updated DFS results.
+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ /// Callback to select the highest priority node from the ready Q.
+ SUnit *pickNode(bool &IsTopNode) override {
+ if (ReadyQ.empty()) return nullptr;
+ std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ SUnit *SU = ReadyQ.back();
+ ReadyQ.pop_back();
+ IsTopNode = false;
+ DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
+ << " ILP: " << DAG->getDFSResult()->getILP(SU)
+ << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
+ << DAG->getDFSResult()->getSubtreeLevel(
+ DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
+ << "Scheduling " << *SU->getInstr());
+ return SU;
+ }
+
+ /// \brief Scheduler callback to notify that a new subtree is scheduled.
+ void scheduleTree(unsigned SubtreeID) override {
+ std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+
+ /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
+ /// DFSResults, and resort the priority Q.
+ void schedNode(SUnit *SU, bool IsTopNode) override {
+ assert(!IsTopNode && "SchedDFSResult needs bottom-up");
+ }
+
+ void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
+
+ void releaseBottomNode(SUnit *SU) override {
+ ReadyQ.push_back(SU);
+ std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false));
+}
+static MachineSchedRegistry ILPMaxRegistry(
+ "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+ "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Shuffler for Correctness Testing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace {
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
+ bool operator()(SUnit *A, SUnit *B) const {
+ if (IsReverse)
+ return A->NodeNum > B->NodeNum;
+ else
+ return A->NodeNum < B->NodeNum;
+ }
+};
+
+/// Reorder instructions as much as possible.
+class InstructionShuffler : public MachineSchedStrategy {
+ bool IsAlternating;
+ bool IsTopDown;
+
+ // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+ // gives nodes with a higher number higher priority causing the latest
+ // instructions to be scheduled first.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+ TopQ;
+ // When scheduling bottom-up, use greater-than as the queue priority.
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+ BottomQ;
+public:
+ InstructionShuffler(bool alternate, bool topdown)
+ : IsAlternating(alternate), IsTopDown(topdown) {}
+
+ void initialize(ScheduleDAGMI*) override {
+ TopQ.clear();
+ BottomQ.clear();
+ }
+
+ /// Implement MachineSchedStrategy interface.
+ /// -----------------------------------------
+
+ SUnit *pickNode(bool &IsTopNode) override {
+ SUnit *SU;
+ if (IsTopDown) {
+ do {
+ if (TopQ.empty()) return nullptr;
+ SU = TopQ.top();
+ TopQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = true;
+ } else {
+ do {
+ if (BottomQ.empty()) return nullptr;
+ SU = BottomQ.top();
+ BottomQ.pop();
+ } while (SU->isScheduled);
+ IsTopNode = false;
+ }
+ if (IsAlternating)
+ IsTopDown = !IsTopDown;
+ return SU;
+ }
+
+ void schedNode(SUnit *SU, bool IsTopNode) override {}
+
+ void releaseTopNode(SUnit *SU) override {
+ TopQ.push(SU);
+ }
+ void releaseBottomNode(SUnit *SU) override {
+ BottomQ.push(SU);
+ }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+ bool Alternate = !ForceTopDown && !ForceBottomUp;
+ bool TopDown = !ForceBottomUp;
+ assert((TopDown || !ForceTopDown) &&
+ "-misched-topdown incompatible with -misched-bottomup");
+ return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown));
+}
+static MachineSchedRegistry ShufflerRegistry(
+ "shuffle", "Shuffle machine instructions alternating directions",
+ createInstructionShuffler);
+#endif // !NDEBUG
+
+//===----------------------------------------------------------------------===//
+// GraphWriter support for ScheduleDAGMILive.
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace llvm {
+
+template<> struct GraphTraits<
+ ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
+
+template<>
+struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool isNodeHidden(const SUnit *Node) {
+ if (ViewMISchedCutoff == 0)
+ return false;
+ return (Node->Preds.size() > ViewMISchedCutoff
+ || Node->Succs.size() > ViewMISchedCutoff);
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+ static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
+ std::string Str;
+ raw_string_ostream SS(Str);
+ const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
+ const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
+ static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
+ SS << "SU:" << SU->NodeNum;
+ if (DFS)
+ SS << " I:" << DFS->getNumInstrs(SU);
+ return SS.str();
+ }
+ static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+ }
+
+ static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
+ std::string Str("shape=Mrecord");
+ const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
+ const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
+ static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
+ if (DFS) {
+ Str += ",style=filled,fillcolor=\"#";
+ Str += DOT::getColorString(DFS->getSubtreeID(N));
+ Str += '"';
+ }
+ return Str;
+ }
+};
+} // namespace llvm
+#endif // NDEBUG
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAGMI::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 000000000000..571a5c1d8005
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,857 @@
+//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-sink"
+
+static cl::opt<bool>
+SplitEdges("machine-sink-split",
+ cl::desc("Split critical edges during machine sinking"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+UseBlockFreqInfo("machine-sink-bfi",
+ cl::desc("Use block frequency info to find successors to sink"),
+ cl::init(true), cl::Hidden);
+
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+
+namespace {
+ class MachineSinking : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI; // Machine register information
+ MachineDominatorTree *DT; // Machine dominator tree
+ MachinePostDominatorTree *PDT; // Machine post dominator tree
+ MachineLoopInfo *LI;
+ const MachineBlockFrequencyInfo *MBFI;
+ AliasAnalysis *AA;
+
+ // Remember which edges have been considered for breaking.
+ SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+ CEBCandidates;
+ // Remember which edges we are about to split.
+ // This is different from CEBCandidates since those edges
+ // will be split.
+ SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit;
+
+ SparseBitVector<> RegsToClearKillFlags;
+
+ typedef std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>
+ AllSuccsCache;
+
+ public:
+ static char ID; // Pass identification
+ MachineSinking() : MachineFunctionPass(ID) {
+ initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachinePostDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ if (UseBlockFreqInfo)
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ }
+
+ void releaseMemory() override {
+ CEBCandidates.clear();
+ }
+
+ private:
+ bool ProcessBlock(MachineBasicBlock &MBB);
+ bool isWorthBreakingCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+ /// \brief Postpone the splitting of the given critical
+ /// edge (\p From, \p To).
+ ///
+ /// We do not split the edges on the fly. Indeed, this invalidates
+ /// the dominance information and thus triggers a lot of updates
+ /// of that information underneath.
+ /// Instead, we postpone all the splits after each iteration of
+ /// the main loop. That way, the information is at least valid
+ /// for the lifetime of an iteration.
+ ///
+ /// \return True if the edge is marked as toSplit, false otherwise.
+ /// False can be returned if, for instance, this is not profitable.
+ bool PostponeSplitCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
+ bool SinkInstruction(MachineInstr &MI, bool &SawStore,
+ AllSuccsCache &AllSuccessors);
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge, bool &LocalUse) const;
+ MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
+ bool isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo,
+ AllSuccsCache &AllSuccessors);
+
+ bool PerformTrivialForwardCoalescing(MachineInstr &MI,
+ MachineBasicBlock *MBB);
+
+ SmallVector<MachineBasicBlock *, 4> &
+ GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
+ AllSuccsCache &AllSuccessors) const;
+ };
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+char &llvm::MachineSinkingID = MachineSinking::ID;
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
+ MachineBasicBlock *MBB) {
+ if (!MI.isCopy())
+ return false;
+
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+ !MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+ if (SRC != DRC)
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isCopyLike())
+ return false;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << MI);
+ MRI->replaceRegWith(DstReg, SrcReg);
+ MI.eraseFromParent();
+
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
+ MRI->clearKillFlags(SrcReg);
+
+ ++NumCoalesces;
+ return true;
+}
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Only makes sense for vregs");
+
+ // Ignore debug uses because debug info doesn't affect the code.
+ if (MRI->use_nodbg_empty(Reg))
+ return true;
+
+ // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+ // into and they are all PHI nodes. In this case, machine-sink must break
+ // the critical edge first. e.g.
+ //
+ // BB#1: derived from LLVM BB %bb4.preheader
+ // Predecessors according to CFG: BB#0
+ // ...
+ // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+ // ...
+ // JE_4 <BB#37>, %EFLAGS<imp-use>
+ // Successors according to CFG: BB#37 BB#2
+ //
+ // BB#2: derived from LLVM BB %bb.nph
+ // Predecessors according to CFG: BB#0 BB#1
+ // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+ BreakPHIEdge = true;
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ MachineInstr *UseInst = MO.getParent();
+ unsigned OpNo = &MO - &UseInst->getOperand(0);
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (!(UseBlock == MBB && UseInst->isPHI() &&
+ UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) {
+ BreakPHIEdge = false;
+ break;
+ }
+ }
+ if (BreakPHIEdge)
+ return true;
+
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ // Determine the block of the use.
+ MachineInstr *UseInst = MO.getParent();
+ unsigned OpNo = &MO - &UseInst->getOperand(0);
+ MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (UseInst->isPHI()) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ UseBlock = UseInst->getOperand(OpNo+1).getMBB();
+ } else if (UseBlock == DefMBB) {
+ LocalUse = true;
+ return false;
+ }
+
+ // Check that it dominates.
+ if (!DT->dominates(MBB, UseBlock))
+ return false;
+ }
+
+ return true;
+}
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ DEBUG(dbgs() << "******** Machine Sinking ********\n");
+
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ PDT = &getAnalysis<MachinePostDominatorTree>();
+ LI = &getAnalysis<MachineLoopInfo>();
+ MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ CEBCandidates.clear();
+ ToSplit.clear();
+ for (auto &MBB: MF)
+ MadeChange |= ProcessBlock(MBB);
+
+ // If we have anything we marked as toSplit, split it now.
+ for (auto &Pair : ToSplit) {
+ auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
+ if (NewSucc != nullptr) {
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << Pair.first->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << Pair.second->getNumber() << '\n');
+ MadeChange = true;
+ ++NumSplit;
+ } else
+ DEBUG(dbgs() << " *** Not legal to break critical edge\n");
+ }
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+
+ // Now clear any kill flags for recorded registers.
+ for (auto I : RegsToClearKillFlags)
+ MRI->clearKillFlags(I);
+ RegsToClearKillFlags.clear();
+
+ return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
+ if (!DT->isReachableFromEntry(&MBB)) return false;
+
+ bool MadeChange = false;
+
+ // Cache all successors, sorted by frequency info and loop depth.
+ AllSuccsCache AllSuccessors;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ MachineBasicBlock::iterator I = MBB.end();
+ --I;
+ bool ProcessedBegin, SawStore = false;
+ do {
+ MachineInstr &MI = *I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == MBB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (MI.isDebugValue())
+ continue;
+
+ bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
+ if (Joined) {
+ MadeChange = true;
+ continue;
+ }
+
+ if (SinkInstruction(MI, SawStore, AllSuccessors)) {
+ ++NumSunk;
+ MadeChange = true;
+ }
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+ // FIXME: Need much better heuristics.
+
+ // If the pass has already considered breaking this edge (during this pass
+ // through the function), then let's go ahead and break it. This means
+ // sinking multiple "cheap" instructions into the same block.
+ if (!CEBCandidates.insert(std::make_pair(From, To)).second)
+ return true;
+
+ if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
+ return true;
+
+ // MI is cheap, we probably don't want to break the critical edge for it.
+ // However, if this would allow some definitions of its source operands
+ // to be sunk then it's probably worth it.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // We don't move live definitions of physical registers,
+ // so sinking their uses won't enable any opportunities.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ // If this instruction is the only user of a virtual register,
+ // check if breaking the edge will enable sinking
+ // both this instruction and the defining instruction.
+ if (MRI->hasOneNonDBGUse(Reg)) {
+ // If the definition resides in same MBB,
+ // claim it's likely we can sink these together.
+ // If definition resides elsewhere, we aren't
+ // blocking it from being sunk so don't break the edge.
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (DefMI->getParent() == MI.getParent())
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+ return false;
+
+ // Avoid breaking back edge. From == To means backedge for single BB loop.
+ if (!SplitEdges || FromBB == ToBB)
+ return false;
+
+ // Check for backedges of more "complex" loops.
+ if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+ LI->isLoopHeader(ToBB))
+ return false;
+
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // BB#1:
+ // v1024
+ // Beq BB#3
+ // <fallthrough>
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ //
+ // BB#1:
+ // ...
+ // Bne BB#2
+ // BB#4:
+ // v1024 =
+ // B BB#3
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ //
+ // There is no need to do this check if all the uses are PHI nodes. PHI
+ // sources are only defined on the specific predecessor edges.
+ if (!BreakPHIEdge) {
+ for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
+ E = ToBB->pred_end(); PI != E; ++PI) {
+ if (*PI == FromBB)
+ continue;
+ if (!DT->dominates(ToBB, *PI))
+ return false;
+ }
+ }
+
+ ToSplit.insert(std::make_pair(FromBB, ToBB));
+
+ return true;
+}
+
+/// collectDebgValues - Scan instructions following MI and collect any
+/// matching DBG_VALUEs.
+static void collectDebugValues(MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &DbgValues) {
+ DbgValues.clear();
+ if (!MI.getOperand(0).isReg())
+ return;
+
+ MachineBasicBlock::iterator DI = MI; ++DI;
+ for (MachineBasicBlock::iterator DE = MI.getParent()->end();
+ DI != DE; ++DI) {
+ if (!DI->isDebugValue())
+ return;
+ if (DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg() == MI.getOperand(0).getReg())
+ DbgValues.push_back(&*DI);
+ }
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *SuccToSinkTo,
+ AllSuccsCache &AllSuccessors) {
+ assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+ if (MBB == SuccToSinkTo)
+ return false;
+
+ // It is profitable if SuccToSinkTo does not post dominate current block.
+ if (!PDT->dominates(SuccToSinkTo, MBB))
+ return true;
+
+ // It is profitable to sink an instruction from a deeper loop to a shallower
+ // loop, even if the latter post-dominates the former (PR21115).
+ if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
+ return true;
+
+ // Check if only use in post dominated block is PHI instruction.
+ bool NonPHIUse = false;
+ for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
+ MachineBasicBlock *UseBlock = UseInst.getParent();
+ if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
+ NonPHIUse = true;
+ }
+ if (!NonPHIUse)
+ return true;
+
+ // If SuccToSinkTo post dominates then also it may be profitable if MI
+ // can further profitably sinked into another block in next round.
+ bool BreakPHIEdge = false;
+ // FIXME - If finding successor is compile time expensive then cache results.
+ if (MachineBasicBlock *MBB2 =
+ FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
+ return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
+
+ // If SuccToSinkTo is final destination and it is a post dominator of current
+ // block then it is not profitable to sink MI into SuccToSinkTo block.
+ return false;
+}
+
+/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
+/// computing it if it was not already cached.
+SmallVector<MachineBasicBlock *, 4> &
+MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
+ AllSuccsCache &AllSuccessors) const {
+
+ // Do we have the sorted successors in cache ?
+ auto Succs = AllSuccessors.find(MBB);
+ if (Succs != AllSuccessors.end())
+ return Succs->second;
+
+ SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ // Handle cases where sinking can happen but where the sink point isn't a
+ // successor. For example:
+ //
+ // x = computation
+ // if () {} else {}
+ // use x
+ //
+ const std::vector<MachineDomTreeNode *> &Children =
+ DT->getNode(MBB)->getChildren();
+ for (const auto &DTChild : Children)
+ // DomTree children of MBB that have MBB as immediate dominator are added.
+ if (DTChild->getIDom()->getBlock() == MI.getParent() &&
+ // Skip MBBs already added to the AllSuccs vector above.
+ !MBB->isSuccessor(DTChild->getBlock()))
+ AllSuccs.push_back(DTChild->getBlock());
+
+ // Sort Successors according to their loop depth or block frequency info.
+ std::stable_sort(
+ AllSuccs.begin(), AllSuccs.end(),
+ [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+ uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
+ uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
+ bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
+ return HasBlockFreq ? LHSFreq < RHSFreq
+ : LI->getLoopDepth(L) < LI->getLoopDepth(R);
+ });
+
+ auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
+
+ return it.first->second;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *
+MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
+ bool &BreakPHIEdge,
+ AllSuccsCache &AllSuccessors) {
+ assert (MBB && "Invalid MachineBasicBlock!");
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ MachineBasicBlock *SuccToSinkTo = nullptr;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue; // Ignore non-register operands.
+
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+ return nullptr;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead. We can't move it.
+ return nullptr;
+ }
+ } else {
+ // Virtual register uses are always safe to sink.
+ if (MO.isUse()) continue;
+
+ // If it's not safe to move defs of the register class, then abort.
+ if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
+ return nullptr;
+
+ // Virtual register defs can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ if (SuccToSinkTo) {
+ // If a previous operand picked a block to sink to, then this operand
+ // must be sinkable to the same block.
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
+ BreakPHIEdge, LocalUse))
+ return nullptr;
+
+ continue;
+ }
+
+ // Otherwise, we should look at all the successors and decide which one
+ // we should sink to. If we have reliable block frequency information
+ // (frequency != 0) available, give successors with smaller frequencies
+ // higher priority, otherwise prioritize smaller loop depths.
+ for (MachineBasicBlock *SuccBlock :
+ GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
+ bool LocalUse = false;
+ if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
+ BreakPHIEdge, LocalUse)) {
+ SuccToSinkTo = SuccBlock;
+ break;
+ }
+ if (LocalUse)
+ // Def is used locally, it's never safe to move this def.
+ return nullptr;
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (!SuccToSinkTo)
+ return nullptr;
+ if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
+ return nullptr;
+ }
+ }
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (MBB == SuccToSinkTo)
+ return nullptr;
+
+ // It's not safe to sink instructions to EH landing pad. Control flow into
+ // landing pad is implicitly defined.
+ if (SuccToSinkTo && SuccToSinkTo->isEHPad())
+ return nullptr;
+
+ return SuccToSinkTo;
+}
+
+/// \brief Return true if MI is likely to be usable as a memory operation by the
+/// implicit null check optimization.
+///
+/// This is a "best effort" heuristic, and should not be relied upon for
+/// correctness. This returning true does not guarantee that the implicit null
+/// check optimization is legal over MI, and this returning false does not
+/// guarantee MI cannot possibly be used to do a null check.
+static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
+
+ auto *MBB = MI.getParent();
+ if (MBB->pred_size() != 1)
+ return false;
+
+ auto *PredMBB = *MBB->pred_begin();
+ auto *PredBB = PredMBB->getBasicBlock();
+
+ // Frontends that don't use implicit null checks have no reason to emit
+ // branches with make.implicit metadata, and this function should always
+ // return false for them.
+ if (!PredBB ||
+ !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
+ return false;
+
+ unsigned BaseReg;
+ int64_t Offset;
+ if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+ return false;
+
+ if (!(MI.mayLoad() && !MI.isPredicable()))
+ return false;
+
+ MachineBranchPredicate MBP;
+ if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
+ return false;
+
+ return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
+ (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
+ MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
+ MBP.LHS.getReg() == BaseReg;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
+ AllSuccsCache &AllSuccessors) {
+ // Don't sink instructions that the target prefers not to sink.
+ if (!TII->shouldSink(MI))
+ return false;
+
+ // Check if it's safe to move the instruction.
+ if (!MI.isSafeToMove(AA, SawStore))
+ return false;
+
+ // Convergent operations may not be made control-dependent on additional
+ // values.
+ if (MI.isConvergent())
+ return false;
+
+ // Don't break implicit null checks. This is a performance heuristic, and not
+ // required for correctness.
+ if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ bool BreakPHIEdge = false;
+ MachineBasicBlock *ParentBlock = MI.getParent();
+ MachineBasicBlock *SuccToSinkTo =
+ FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
+
+ // If there are no outputs, it must have side-effects.
+ if (!SuccToSinkTo)
+ return false;
+
+
+ // If the instruction to move defines a dead physical register which is live
+ // when leaving the basic block, don't move it because it could turn into a
+ // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (SuccToSinkTo->isLiveIn(Reg))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
+
+ // If the block has multiple predecessors, this is a critical edge.
+ // Decide if we can sink along it or need to break the edge.
+ if (SuccToSinkTo->pred_size() > 1) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ bool TryBreak = false;
+ bool store = true;
+ if (!MI.isSafeToMove(AA, store)) {
+ DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+ TryBreak = true;
+ }
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+ TryBreak = true;
+ }
+
+ // Don't sink instructions into a loop.
+ if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ TryBreak = true;
+ }
+
+ // Otherwise we are OK with sinking along a critical edge.
+ if (!TryBreak)
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ else {
+ // Mark this edge as to be split.
+ // If the edge can actually be split, the next iteration of the main loop
+ // will sink MI in the newly created block.
+ bool Status =
+ PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+ if (!Status)
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ // The instruction will not be sunk this time.
+ return false;
+ }
+ }
+
+ if (BreakPHIEdge) {
+ // BreakPHIEdge is true if all the uses are in the successor MBB being
+ // sunken into and they are all PHI nodes. In this case, machine-sink must
+ // break the critical edge first.
+ bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!Status)
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ // The instruction will not be sunk this time.
+ return false;
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
+ ++InsertPos;
+
+ // collect matching debug values.
+ SmallVector<MachineInstr *, 2> DbgValuesToSink;
+ collectDebugValues(MI, DbgValuesToSink);
+
+ // Move the instruction.
+ SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
+ ++MachineBasicBlock::iterator(MI));
+
+ // Move debug values.
+ for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
+ DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) {
+ MachineInstr *DbgMI = *DBI;
+ SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI,
+ ++MachineBasicBlock::iterator(DbgMI));
+ }
+
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
+ // Note that we have to clear the kill flags for any register this instruction
+ // uses as we may sink over another instruction which currently kills the
+ // used registers.
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isUse())
+ RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
new file mode 100644
index 000000000000..86332c8a93a5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -0,0 +1,1328 @@
+//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "machine-trace-metrics"
+
+char MachineTraceMetrics::ID = 0;
+char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
+
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineTraceMetrics,
+ "machine-trace-metrics", "Machine Trace Metrics", false, true)
+
+MachineTraceMetrics::MachineTraceMetrics()
+ : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
+ MRI(nullptr), Loops(nullptr) {
+ std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
+}
+
+void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ MRI = &MF->getRegInfo();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
+ BlockInfo.resize(MF->getNumBlockIDs());
+ ProcResourceCycles.resize(MF->getNumBlockIDs() *
+ SchedModel.getNumProcResourceKinds());
+ return false;
+}
+
+void MachineTraceMetrics::releaseMemory() {
+ MF = nullptr;
+ BlockInfo.clear();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i) {
+ delete Ensembles[i];
+ Ensembles[i] = nullptr;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed block information
+//===----------------------------------------------------------------------===//
+//
+// The number of instructions in a basic block and the CPU resources used by
+// those instructions don't depend on any given trace strategy.
+
+/// Compute the resource usage in basic block MBB.
+const MachineTraceMetrics::FixedBlockInfo*
+MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
+ assert(MBB && "No basic block");
+ FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
+ if (FBI->hasResources())
+ return FBI;
+
+ // Compute resource usage in the block.
+ FBI->HasCalls = false;
+ unsigned InstrCount = 0;
+
+ // Add up per-processor resource cycles as well.
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ SmallVector<unsigned, 32> PRCycles(PRKinds);
+
+ for (const auto &MI : *MBB) {
+ if (MI.isTransient())
+ continue;
+ ++InstrCount;
+ if (MI.isCall())
+ FBI->HasCalls = true;
+
+ // Count processor resources used.
+ if (!SchedModel.hasInstrSchedModel())
+ continue;
+ const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
+ if (!SC->isValid())
+ continue;
+
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
+ PRCycles[PI->ProcResourceIdx] += PI->Cycles;
+ }
+ }
+ FBI->InstrCount = InstrCount;
+
+ // Scale the resource cycles so they are comparable.
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceCycles[PROffset + K] =
+ PRCycles[K] * SchedModel.getResourceFactor(K);
+
+ return FBI;
+}
+
+ArrayRef<unsigned>
+MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
+ assert(BlockInfo[MBBNum].hasResources() &&
+ "getResources() must be called before getProcResourceCycles()");
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
+ return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Ensemble utility functions
+//===----------------------------------------------------------------------===//
+
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+ : MTM(*ct) {
+ BlockInfo.resize(MTM.BlockInfo.size());
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
+ ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
+}
+
+// Virtual destructor serves as an anchor.
+MachineTraceMetrics::Ensemble::~Ensemble() {}
+
+const MachineLoop*
+MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
+ return MTM.Loops->getLoopFor(MBB);
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace above MBB.
+void MachineTraceMetrics::Ensemble::
+computeDepthResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+
+ // Compute resources from trace above. The top block is simple.
+ if (!TBI->Pred) {
+ TBI->InstrDepth = 0;
+ TBI->Head = MBB->getNumber();
+ std::fill(ProcResourceDepths.begin() + PROffset,
+ ProcResourceDepths.begin() + PROffset + PRKinds, 0);
+ return;
+ }
+
+ // Compute from the block above. A post-order traversal ensures the
+ // predecessor is always computed first.
+ unsigned PredNum = TBI->Pred->getNumber();
+ TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
+ assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
+ const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
+ TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
+ TBI->Head = PredTBI->Head;
+
+ // Compute per-resource depths.
+ ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
+ ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace below MBB.
+void MachineTraceMetrics::Ensemble::
+computeHeightResources(const MachineBasicBlock *MBB) {
+ TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+
+ // Compute resources for the current block.
+ TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+ ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
+
+ // The trace tail is done.
+ if (!TBI->Succ) {
+ TBI->Tail = MBB->getNumber();
+ std::copy(PRCycles.begin(), PRCycles.end(),
+ ProcResourceHeights.begin() + PROffset);
+ return;
+ }
+
+ // Compute from the block below. A post-order traversal ensures the
+ // predecessor is always computed first.
+ unsigned SuccNum = TBI->Succ->getNumber();
+ TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
+ assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
+ TBI->InstrHeight += SuccTBI->InstrHeight;
+ TBI->Tail = SuccTBI->Tail;
+
+ // Compute per-resource heights.
+ ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
+}
+
+// Check if depth resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getDepthResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidDepth() ? TBI : nullptr;
+}
+
+// Check if height resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getHeightResources(const MachineBasicBlock *MBB) const {
+ const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ return TBI->hasValidHeight() ? TBI : nullptr;
+}
+
+/// Get an array of processor resource depths for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by all blocks preceding MBB in its trace. It does not include instructions
+/// in MBB.
+///
+/// Compare TraceBlockInfo::InstrDepth.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceDepths(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
+ return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
+}
+
+/// Get an array of processor resource heights for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by this block and all blocks following it in its trace.
+///
+/// Compare TraceBlockInfo::InstrHeight.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceHeights(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
+ return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
+}
+
+//===----------------------------------------------------------------------===//
+// Trace Selection Strategies
+//===----------------------------------------------------------------------===//
+//
+// A trace selection strategy is implemented as a sub-class of Ensemble. The
+// trace through a block B is computed by two DFS traversals of the CFG
+// starting from B. One upwards, and one downwards. During the upwards DFS,
+// pickTracePred() is called on the post-ordered blocks. During the downwards
+// DFS, pickTraceSucc() is called in a post-order.
+//
+
+// We never allow traces that leave loops, but we do allow traces to enter
+// nested loops. We also never allow traces to contain back-edges.
+//
+// This means that a loop header can never appear above the center block of a
+// trace, except as the trace head. Below the center block, loop exiting edges
+// are banned.
+//
+// Return true if an edge from the From loop to the To loop is leaving a loop.
+// Either of To and From can be null.
+static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
+ return From && !From->contains(To);
+}
+
+// MinInstrCountEnsemble - Pick the trace that executes the least number of
+// instructions.
+namespace {
+class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const override { return "MinInstr"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
+
+public:
+ MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+ : MachineTraceMetrics::Ensemble(mtm) {}
+};
+}
+
+// Select the preferred predecessor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return nullptr;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ // Don't leave loops, and never follow back-edges.
+ if (CurLoop && MBB == CurLoop->getHeader())
+ return nullptr;
+ unsigned CurCount = MTM.getResources(MBB)->InstrCount;
+ const MachineBasicBlock *Best = nullptr;
+ unsigned BestDepth = 0;
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ const MachineTraceMetrics::TraceBlockInfo *PredTBI =
+ getDepthResources(Pred);
+ // Ignore cycles that aren't natural loops.
+ if (!PredTBI)
+ continue;
+ // Pick the predecessor that would give this block the smallest InstrDepth.
+ unsigned Depth = PredTBI->InstrDepth + CurCount;
+ if (!Best || Depth < BestDepth) {
+ Best = Pred;
+ BestDepth = Depth;
+ }
+ }
+ return Best;
+}
+
+// Select the preferred successor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
+ if (MBB->pred_empty())
+ return nullptr;
+ const MachineLoop *CurLoop = getLoopFor(MBB);
+ const MachineBasicBlock *Best = nullptr;
+ unsigned BestHeight = 0;
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ // Don't consider back-edges.
+ if (CurLoop && Succ == CurLoop->getHeader())
+ continue;
+ // Don't consider successors exiting CurLoop.
+ if (isExitingLoop(CurLoop, getLoopFor(Succ)))
+ continue;
+ const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
+ getHeightResources(Succ);
+ // Ignore cycles that aren't natural loops.
+ if (!SuccTBI)
+ continue;
+ // Pick the successor that would give this block the smallest InstrHeight.
+ unsigned Height = SuccTBI->InstrHeight;
+ if (!Best || Height < BestHeight) {
+ Best = Succ;
+ BestHeight = Height;
+ }
+ }
+ return Best;
+}
+
+// Get an Ensemble sub-class for the requested trace strategy.
+MachineTraceMetrics::Ensemble *
+MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
+ assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[strategy];
+ if (E)
+ return E;
+
+ // Allocate new Ensemble on demand.
+ switch (strategy) {
+ case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+ default: llvm_unreachable("Invalid trace strategy enum");
+ }
+}
+
+void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+ BlockInfo[MBB->getNumber()].invalidate();
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->invalidate(MBB);
+}
+
+void MachineTraceMetrics::verifyAnalysis() const {
+ if (!MF)
+ return;
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
+ for (unsigned i = 0; i != TS_NumStrategies; ++i)
+ if (Ensembles[i])
+ Ensembles[i]->verify();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Trace building
+//===----------------------------------------------------------------------===//
+//
+// Traces are built by two CFG traversals. To avoid recomputing too much, use a
+// set abstraction that confines the search to the current loop, and doesn't
+// revisit blocks.
+
+namespace {
+struct LoopBounds {
+ MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+ SmallPtrSet<const MachineBasicBlock*, 8> Visited;
+ const MachineLoopInfo *Loops;
+ bool Downward;
+ LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
+ const MachineLoopInfo *loops)
+ : Blocks(blocks), Loops(loops), Downward(false) {}
+};
+}
+
+// Specialize po_iterator_storage in order to prune the post-order traversal so
+// it is limited to the current loop and doesn't traverse the loop back edges.
+namespace llvm {
+template<>
+class po_iterator_storage<LoopBounds, true> {
+ LoopBounds &LB;
+public:
+ po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+ void finishPostorder(const MachineBasicBlock*) {}
+
+ bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+ // Skip already visited To blocks.
+ MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
+ if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
+ return false;
+ // From is null once when To is the trace center block.
+ if (From) {
+ if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+ // Don't follow backedges, don't leave FromLoop when going upwards.
+ if ((LB.Downward ? To : From) == FromLoop->getHeader())
+ return false;
+ // Don't leave FromLoop.
+ if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+ return false;
+ }
+ }
+ // To is a new block. Mark the block as visited in case the CFG has cycles
+ // that MachineLoopInfo didn't recognize as a natural loop.
+ return LB.Visited.insert(To).second;
+ }
+};
+}
+
+/// Compute the trace through MBB.
+void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
+ << MBB->getNumber() << '\n');
+ // Set up loop bounds for the backwards post-order traversal.
+ LoopBounds Bounds(BlockInfo, MTM.Loops);
+
+ // Run an upwards post-order search for the trace start.
+ Bounds.Downward = false;
+ Bounds.Visited.clear();
+ for (auto I : inverse_post_order_ext(MBB, Bounds)) {
+ DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the predecessors have been visited, pick the preferred one.
+ TBI.Pred = pickTracePred(I);
+ DEBUG({
+ if (TBI.Pred)
+ dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leading to I is now known, compute the depth resources.
+ computeDepthResources(I);
+ }
+
+ // Run a downwards post-order search for the trace end.
+ Bounds.Downward = true;
+ Bounds.Visited.clear();
+ for (auto I : post_order_ext(MBB, Bounds)) {
+ DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
+ TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+ // All the successors have been visited, pick the preferred one.
+ TBI.Succ = pickTraceSucc(I);
+ DEBUG({
+ if (TBI.Succ)
+ dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+ else
+ dbgs() << "null\n";
+ });
+ // The trace leaving I is now known, compute the height resources.
+ computeHeightResources(I);
+ }
+}
+
+/// Invalidate traces through BadMBB.
+void
+MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
+ SmallVector<const MachineBasicBlock*, 16> WorkList;
+ TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
+
+ // Invalidate height resources of blocks above MBB.
+ if (BadTBI.hasValidHeight()) {
+ BadTBI.invalidateHeight();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " height.\n");
+ // Find any MBB predecessors that have MBB as their preferred successor.
+ // They are the only ones that need to be invalidated.
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
+ if (!TBI.hasValidHeight())
+ continue;
+ if (TBI.Succ == MBB) {
+ TBI.invalidateHeight();
+ WorkList.push_back(Pred);
+ continue;
+ }
+ // Verify that TBI.Succ is actually a *I successor.
+ assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Invalidate depth resources of blocks below MBB.
+ if (BadTBI.hasValidDepth()) {
+ BadTBI.invalidateDepth();
+ WorkList.push_back(BadMBB);
+ do {
+ const MachineBasicBlock *MBB = WorkList.pop_back_val();
+ DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+ << " depth.\n");
+ // Find any MBB successors that have MBB as their preferred predecessor.
+ // They are the only ones that need to be invalidated.
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
+ if (!TBI.hasValidDepth())
+ continue;
+ if (TBI.Pred == MBB) {
+ TBI.invalidateDepth();
+ WorkList.push_back(Succ);
+ continue;
+ }
+ // Verify that TBI.Pred is actually a *I predecessor.
+ assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
+ }
+ } while (!WorkList.empty());
+ }
+
+ // Clear any per-instruction data. We only have to do this for BadMBB itself
+ // because the instructions in that block may change. Other blocks may be
+ // invalidated, but their instructions will stay the same, so there is no
+ // need to erase the Cycle entries. They will be overwritten when we
+ // recompute.
+ for (const auto &I : *BadMBB)
+ Cycles.erase(&I);
+}
+
+void MachineTraceMetrics::Ensemble::verify() const {
+#ifndef NDEBUG
+ assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
+ "Outdated BlockInfo size");
+ for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
+ const TraceBlockInfo &TBI = BlockInfo[Num];
+ if (TBI.hasValidDepth() && TBI.Pred) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
+ "Trace is broken, depth should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
+ }
+ if (TBI.hasValidHeight() && TBI.Succ) {
+ const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+ assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
+ assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
+ "Trace is broken, height should have been invalidated.");
+ const MachineLoop *Loop = getLoopFor(MBB);
+ const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
+ assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
+ "Trace contains backedge");
+ }
+ }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Data Dependencies
+//===----------------------------------------------------------------------===//
+//
+// Compute the depth and height of each instruction based on data dependencies
+// and instruction latencies. These cycle numbers assume that the CPU can issue
+// an infinite number of instructions per cycle as long as their dependencies
+// are ready.
+
+// A data dependency is represented as a defining MI and operand numbers on the
+// defining and using MI.
+namespace {
+struct DataDep {
+ const MachineInstr *DefMI;
+ unsigned DefOp;
+ unsigned UseOp;
+
+ DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
+ : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
+
+ /// Create a DataDep from an SSA form virtual register.
+ DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
+ : UseOp(UseOp) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+ MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
+ assert(!DefI.atEnd() && "Register has no defs");
+ DefMI = DefI->getParent();
+ DefOp = DefI.getOperandNo();
+ assert((++DefI).atEnd() && "Register has multiple defs");
+ }
+};
+}
+
+// Get the input data dependencies that must be ready before UseMI can issue.
+// Return true if UseMI has any physreg operands.
+static bool getDataDeps(const MachineInstr &UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineRegisterInfo *MRI) {
+ // Debug values should not be included in any calculations.
+ if (UseMI.isDebugValue())
+ return false;
+
+ bool HasPhysRegs = false;
+ for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(),
+ E = UseMI.operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ HasPhysRegs = true;
+ continue;
+ }
+ // Collect virtual register reads.
+ if (MO.readsReg())
+ Deps.push_back(DataDep(MRI, Reg, UseMI.getOperandNo(I)));
+ }
+ return HasPhysRegs;
+}
+
+// Get the input data dependencies of a PHI instruction, using Pred as the
+// preferred predecessor.
+// This will add at most one dependency to Deps.
+static void getPHIDeps(const MachineInstr &UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ const MachineBasicBlock *Pred,
+ const MachineRegisterInfo *MRI) {
+ // No predecessor at the beginning of a trace. Ignore dependencies.
+ if (!Pred)
+ return;
+ assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
+ for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
+ if (UseMI.getOperand(i + 1).getMBB() == Pred) {
+ unsigned Reg = UseMI.getOperand(i).getReg();
+ Deps.push_back(DataDep(MRI, Reg, i));
+ return;
+ }
+ }
+}
+
+// Keep track of physreg data dependencies by recording each live register unit.
+// Associate each regunit with an instruction operand. Depending on the
+// direction instructions are scanned, it could be the operand that defined the
+// regunit, or the highest operand to read the regunit.
+namespace {
+struct LiveRegUnit {
+ unsigned RegUnit;
+ unsigned Cycle;
+ const MachineInstr *MI;
+ unsigned Op;
+
+ unsigned getSparseSetIndex() const { return RegUnit; }
+
+ LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
+};
+}
+
+// Identify physreg dependencies for UseMI, and update the live regunit
+// tracking set when scanning instructions downwards.
+static void updatePhysDepsDownwards(const MachineInstr *UseMI,
+ SmallVectorImpl<DataDep> &Deps,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> Kills;
+ SmallVector<unsigned, 8> LiveDefOps;
+
+ for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(),
+ ME = UseMI->operands_end(); MI != ME; ++MI) {
+ const MachineOperand &MO = *MI;
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ // Track live defs and kills for updating RegUnits.
+ if (MO.isDef()) {
+ if (MO.isDead())
+ Kills.push_back(Reg);
+ else
+ LiveDefOps.push_back(UseMI->getOperandNo(MI));
+ } else if (MO.isKill())
+ Kills.push_back(Reg);
+ // Identify dependencies.
+ if (!MO.readsReg())
+ continue;
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
+ break;
+ }
+ }
+
+ // Update RegUnits to reflect live registers after UseMI.
+ // First kills.
+ for (unsigned Kill : Kills)
+ for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
+ RegUnits.erase(*Units);
+
+ // Second, live defs.
+ for (unsigned DefOp : LiveDefOps) {
+ for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+ Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ LRU.MI = UseMI;
+ LRU.Op = DefOp;
+ }
+ }
+}
+
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+/// possible that the critical path through the trace doesn't include any
+/// instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+ assert(TBI.HasValidInstrDepths && "Missing depth info");
+ assert(TBI.HasValidInstrHeights && "Missing height info");
+ unsigned MaxLen = 0;
+ for (const LiveInReg &LIR : TBI.LiveIns) {
+ if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ continue;
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ // Ignore dependencies outside the current trace.
+ const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+ if (!DefTBI.isUsefulDominator(TBI))
+ continue;
+ unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+ MaxLen = std::max(MaxLen, Len);
+ }
+ return MaxLen;
+}
+
+/// Compute instruction depths for all instructions above or in MBB in its
+/// trace. This assumes that the trace through MBB has already been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrDepths(const MachineBasicBlock *MBB) {
+ // The top of the trace may already be computed, and HasValidInstrDepths
+ // implies Head->HasValidInstrDepths, so we only need to start from the first
+ // block in the trace that needs to be recomputed.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidDepth() && "Incomplete trace");
+ if (TBI.HasValidInstrDepths)
+ break;
+ Stack.push_back(MBB);
+ MBB = TBI.Pred;
+ } while (MBB);
+
+ // FIXME: If MBB is non-null at this point, it is the last pre-computed block
+ // in the trace. We should track any live-out physregs that were defined in
+ // the trace. This is quite rare in SSA form, typically created by CSE
+ // hoisting a compare.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // Go through trace blocks in top-down order, stopping after the center block.
+ SmallVector<DataDep, 8> Deps;
+ while (!Stack.empty()) {
+ MBB = Stack.pop_back_val();
+ DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrDepths = true;
+ TBI.CriticalPath = 0;
+
+ // Print out resource depths here as well.
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
+ ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ if (PRDepths[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
+ // Also compute the critical path length through MBB when possible.
+ if (TBI.HasValidInstrHeights)
+ TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
+ for (const auto &UseMI : *MBB) {
+ // Collect all data dependencies.
+ Deps.clear();
+ if (UseMI.isPHI())
+ getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
+
+ // Filter and process dependencies, computing the earliest issue cycle.
+ unsigned Cycle = 0;
+ for (const DataDep &Dep : Deps) {
+ const TraceBlockInfo&DepTBI =
+ BlockInfo[Dep.DefMI->getParent()->getNumber()];
+ // Ignore dependencies from outside the current trace.
+ if (!DepTBI.isUsefulDominator(TBI))
+ continue;
+ assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+ unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += MTM.SchedModel
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
+ Cycle = std::max(Cycle, DepCycle);
+ }
+ // Remember the instruction depth.
+ InstrCycles &MICycles = Cycles[&UseMI];
+ MICycles.Depth = Cycle;
+
+ if (!TBI.HasValidInstrHeights) {
+ DEBUG(dbgs() << Cycle << '\t' << UseMI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
+ }
+ }
+}
+
+// Identify physreg dependencies for MI when scanning instructions upwards.
+// Return the issue height of MI after considering any live regunits.
+// Height is the issue height computed from virtual register dependencies alone.
+static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
+ SparseSet<LiveRegUnit> &RegUnits,
+ const TargetSchedModel &SchedModel,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 8> ReadOps;
+
+ for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end();
+ MOI != MOE; ++MOI) {
+ const MachineOperand &MO = *MOI;
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MO.readsReg())
+ ReadOps.push_back(MI.getOperandNo(MOI));
+ if (!MO.isDef())
+ continue;
+ // This is a def of Reg. Remove corresponding entries from RegUnits, and
+ // update MI Height to consider the physreg dependencies.
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ if (I == RegUnits.end())
+ continue;
+ unsigned DepHeight = I->Cycle;
+ if (!MI.isTransient()) {
+ // We may not know the UseMI of this dependency, if it came from the
+ // live-in list. SchedModel can handle a NULL UseMI.
+ DepHeight += SchedModel.computeOperandLatency(&MI, MI.getOperandNo(MOI),
+ I->MI, I->Op);
+ }
+ Height = std::max(Height, DepHeight);
+ // This regunit is dead above MI.
+ RegUnits.erase(I);
+ }
+ }
+
+ // Now we know the height of MI. Update any regunits read.
+ for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
+ unsigned Reg = MI.getOperand(ReadOps[i]).getReg();
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ LiveRegUnit &LRU = RegUnits[*Units];
+ // Set the height to the highest reader of the unit.
+ if (LRU.Cycle <= Height && LRU.MI != &MI) {
+ LRU.Cycle = Height;
+ LRU.MI = &MI;
+ LRU.Op = ReadOps[i];
+ }
+ }
+ }
+
+ return Height;
+}
+
+
+typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+
+// Push the height of DefMI upwards if required to match UseMI.
+// Return true if this is the first time DefMI was seen.
+static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI,
+ unsigned UseHeight, MIHeightMap &Heights,
+ const TargetSchedModel &SchedModel,
+ const TargetInstrInfo *TII) {
+ // Adjust height by Dep.DefMI latency.
+ if (!Dep.DefMI->isTransient())
+ UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI,
+ Dep.UseOp);
+
+ // Update Heights[DefMI] to be the maximum height seen.
+ MIHeightMap::iterator I;
+ bool New;
+ std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+ if (New)
+ return true;
+
+ // DefMI has been pushed before. Give it the max height.
+ if (I->second < UseHeight)
+ I->second = UseHeight;
+ return false;
+}
+
+/// Assuming that the virtual register defined by DefMI:DefOp was used by
+/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop
+/// when reaching the block that contains DefMI.
+void MachineTraceMetrics::Ensemble::
+addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
+ ArrayRef<const MachineBasicBlock*> Trace) {
+ assert(!Trace.empty() && "Trace should contain at least one block");
+ unsigned Reg = DefMI->getOperand(DefOp).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const MachineBasicBlock *DefMBB = DefMI->getParent();
+
+ // Reg is live-in to all blocks in Trace that follow DefMBB.
+ for (unsigned i = Trace.size(); i; --i) {
+ const MachineBasicBlock *MBB = Trace[i-1];
+ if (MBB == DefMBB)
+ return;
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ // Just add the register. The height will be updated later.
+ TBI.LiveIns.push_back(Reg);
+ }
+}
+
+/// Compute instruction heights in the trace through MBB. This updates MBB and
+/// the blocks below it in the trace. It is assumed that the trace has already
+/// been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrHeights(const MachineBasicBlock *MBB) {
+ // The bottom of the trace may already be computed.
+ // Find the blocks that need updating.
+ SmallVector<const MachineBasicBlock*, 8> Stack;
+ do {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ assert(TBI.hasValidHeight() && "Incomplete trace");
+ if (TBI.HasValidInstrHeights)
+ break;
+ Stack.push_back(MBB);
+ TBI.LiveIns.clear();
+ MBB = TBI.Succ;
+ } while (MBB);
+
+ // As we move upwards in the trace, keep track of instructions that are
+ // required by deeper trace instructions. Map MI -> height required so far.
+ MIHeightMap Heights;
+
+ // For physregs, the def isn't known when we see the use.
+ // Instead, keep track of the highest use of each regunit.
+ SparseSet<LiveRegUnit> RegUnits;
+ RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+ // If the bottom of the trace was already precomputed, initialize heights
+ // from its live-in list.
+ // MBB is the highest precomputed block in the trace.
+ if (MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ for (LiveInReg &LI : TBI.LiveIns) {
+ if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+ // For virtual registers, the def latency is included.
+ unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
+ if (Height < LI.Height)
+ Height = LI.Height;
+ } else {
+ // For register units, the def latency is not included because we don't
+ // know the def yet.
+ RegUnits[LI.Reg].Cycle = LI.Height;
+ }
+ }
+ }
+
+ // Go through the trace blocks in bottom-up order.
+ SmallVector<DataDep, 8> Deps;
+ for (;!Stack.empty(); Stack.pop_back()) {
+ MBB = Stack.back();
+ DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+ TBI.HasValidInstrHeights = true;
+ TBI.CriticalPath = 0;
+
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
+ ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
+ for (unsigned K = 0; K != PRHeights.size(); ++K)
+ if (PRHeights[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
+ // Get dependencies from PHIs in the trace successor.
+ const MachineBasicBlock *Succ = TBI.Succ;
+ // If MBB is the last block in the trace, and it has a back-edge to the
+ // loop header, get loop-carried dependencies from PHIs in the header. For
+ // that purpose, pretend that all the loop header PHIs have height 0.
+ if (!Succ)
+ if (const MachineLoop *Loop = getLoopFor(MBB))
+ if (MBB->isSuccessor(Loop->getHeader()))
+ Succ = Loop->getHeader();
+
+ if (Succ) {
+ for (const auto &PHI : *Succ) {
+ if (!PHI.isPHI())
+ break;
+ Deps.clear();
+ getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ if (!Deps.empty()) {
+ // Loop header PHI heights are all 0.
+ unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
+ if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel,
+ MTM.TII))
+ addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
+ }
+ }
+ }
+
+ // Go through the block backwards.
+ for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
+ BI != BB;) {
+ const MachineInstr &MI = *--BI;
+
+ // Find the MI height as determined by virtual register uses in the
+ // trace below.
+ unsigned Cycle = 0;
+ MIHeightMap::iterator HeightI = Heights.find(&MI);
+ if (HeightI != Heights.end()) {
+ Cycle = HeightI->second;
+ // We won't be seeing any more MI uses.
+ Heights.erase(HeightI);
+ }
+
+ // Don't process PHI deps. They depend on the specific predecessor, and
+ // we'll get them when visiting the predecessor.
+ Deps.clear();
+ bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+
+ // There may also be regunit dependencies to include in the height.
+ if (HasPhysRegs)
+ Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel,
+ MTM.TII, MTM.TRI);
+
+ // Update the required height of any virtual registers read by MI.
+ for (const DataDep &Dep : Deps)
+ if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
+ addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
+
+ InstrCycles &MICycles = Cycles[&MI];
+ MICycles.Height = Cycle;
+ if (!TBI.HasValidInstrDepths) {
+ DEBUG(dbgs() << Cycle << '\t' << MI);
+ continue;
+ }
+ // Update critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI);
+ }
+
+ // Update virtual live-in heights. They were added by addLiveIns() with a 0
+ // height because the final height isn't known until now.
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
+ for (LiveInReg &LIR : TBI.LiveIns) {
+ const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+ LIR.Height = Heights.lookup(DefMI);
+ DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+ }
+
+ // Transfer the live regunits to the live-in list.
+ for (SparseSet<LiveRegUnit>::const_iterator
+ RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
+ TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
+ DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+ << '@' << RI->Cycle);
+ }
+ DEBUG(dbgs() << '\n');
+
+ if (!TBI.HasValidInstrDepths)
+ continue;
+ // Add live-ins to the critical path length.
+ TBI.CriticalPath = std::max(TBI.CriticalPath,
+ computeCrossBlockCriticalPath(TBI));
+ DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+ }
+}
+
+MachineTraceMetrics::Trace
+MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+
+ if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
+ computeTrace(MBB);
+ if (!TBI.HasValidInstrDepths)
+ computeInstrDepths(MBB);
+ if (!TBI.HasValidInstrHeights)
+ computeInstrHeights(MBB);
+
+ return Trace(*this, TBI);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr &MI) const {
+ assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) &&
+ "MI must be in the trace center block");
+ InstrCycles Cyc = getInstrCycles(MI);
+ return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const {
+ const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+ SmallVector<DataDep, 1> Deps;
+ getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+ assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+ DataDep &Dep = Deps.front();
+ unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth;
+ // Add latency if DefMI is a real instruction. Transients get latency 0.
+ if (!Dep.DefMI->isTransient())
+ DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
+ &PHI, Dep.UseOp);
+ return DepCycle;
+}
+
+/// When bottom is set include instructions in current block in estimate.
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+ // Find the limiting processor resource.
+ // Numbers have been pre-scaled to be comparable.
+ unsigned PRMax = 0;
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ if (Bottom) {
+ ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
+ } else {
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K]);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ /// All instructions before current block
+ unsigned Instrs = TBI.InstrDepth;
+ // plus instructions in current block
+ if (Bottom)
+ Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return std::max(Instrs, PRMax);
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceLength(
+ ArrayRef<const MachineBasicBlock *> Extrablocks,
+ ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
+ ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
+ // Add up resources above and below the center block.
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
+ unsigned PRMax = 0;
+
+ // Capture computing cycles from extra instructions
+ auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
+ unsigned ResourceIdx)
+ ->unsigned {
+ unsigned Cycles = 0;
+ for (const MCSchedClassDesc *SC : Instrs) {
+ if (!SC->isValid())
+ continue;
+ for (TargetSchedModel::ProcResIter
+ PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+ PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if (PI->ProcResourceIdx != ResourceIdx)
+ continue;
+ Cycles +=
+ (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
+ }
+ }
+ return Cycles;
+ };
+
+ for (unsigned K = 0; K != PRDepths.size(); ++K) {
+ unsigned PRCycles = PRDepths[K] + PRHeights[K];
+ for (const MachineBasicBlock *MBB : Extrablocks)
+ PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
+ PRCycles += extraCycles(ExtraInstrs, K);
+ PRCycles -= extraCycles(RemoveInstrs, K);
+ PRMax = std::max(PRMax, PRCycles);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ // Instrs: #instructions in current trace outside current block.
+ unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+ // Add instruction count from the extra blocks.
+ for (const MachineBasicBlock *MBB : Extrablocks)
+ Instrs += TE.MTM.getResources(MBB)->InstrCount;
+ Instrs += ExtraInstrs.size();
+ Instrs -= RemoveInstrs.size();
+ if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
+ Instrs /= IW;
+ // Assume issue width 1 without a schedule model.
+ return std::max(Instrs, PRMax);
+}
+
+bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI,
+ const MachineInstr &UseMI) const {
+ if (DefMI.getParent() == UseMI.getParent())
+ return true;
+
+ const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()];
+ const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()];
+
+ return DepTBI.isUsefulDominator(TBI);
+}
+
+void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
+ OS << getName() << " ensemble:\n";
+ for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+ OS << " BB#" << i << '\t';
+ BlockInfo[i].print(OS);
+ OS << '\n';
+ }
+}
+
+void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
+ if (hasValidDepth()) {
+ OS << "depth=" << InstrDepth;
+ if (Pred)
+ OS << " pred=BB#" << Pred->getNumber();
+ else
+ OS << " pred=null";
+ OS << " head=BB#" << Head;
+ if (HasValidInstrDepths)
+ OS << " +instrs";
+ } else
+ OS << "depth invalid";
+ OS << ", ";
+ if (hasValidHeight()) {
+ OS << "height=" << InstrHeight;
+ if (Succ)
+ OS << " succ=BB#" << Succ->getNumber();
+ else
+ OS << " succ=null";
+ OS << " tail=BB#" << Tail;
+ if (HasValidInstrHeights)
+ OS << " +instrs";
+ } else
+ OS << "height invalid";
+ if (HasValidInstrDepths && HasValidInstrHeights)
+ OS << ", crit=" << CriticalPath;
+}
+
+void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
+ unsigned MBBNum = &TBI - &TE.BlockInfo[0];
+
+ OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
+ << " --> BB#" << TBI.Tail << ':';
+ if (TBI.hasValidHeight() && TBI.hasValidDepth())
+ OS << ' ' << getInstrCount() << " instrs.";
+ if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+ OS << ' ' << TBI.CriticalPath << " cycles.";
+
+ const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
+ OS << "\nBB#" << MBBNum;
+ while (Block->hasValidDepth() && Block->Pred) {
+ unsigned Num = Block->Pred->getNumber();
+ OS << " <- BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+
+ Block = &TBI;
+ OS << "\n ";
+ while (Block->hasValidHeight() && Block->Succ) {
+ unsigned Num = Block->Succ->getNumber();
+ OS << " -> BB#" << Num;
+ Block = &TE.BlockInfo[Num];
+ }
+ OS << '\n';
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 000000000000..a70adb050455
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,2049 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+namespace {
+ struct MachineVerifier {
+
+ MachineVerifier(Pass *pass, const char *b) :
+ PASS(pass),
+ Banner(b)
+ {}
+
+ unsigned verify(MachineFunction &MF);
+
+ Pass *const PASS;
+ const char *Banner;
+ const MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+
+ unsigned foundErrors;
+
+ typedef SmallVector<unsigned, 16> RegVector;
+ typedef SmallVector<const uint32_t*, 4> RegMaskVector;
+ typedef DenseSet<unsigned> RegSet;
+ typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+ typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
+
+ const MachineInstr *FirstTerminator;
+ BlockSet FunctionBlocks;
+
+ BitVector regsReserved;
+ RegSet regsLive;
+ RegVector regsDefined, regsDead, regsKilled;
+ RegMaskVector regMasks;
+ RegSet regsLiveInButUnused;
+
+ SlotIndex lastIndex;
+
+ // Add Reg and any sub-registers to RV
+ void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ RV.push_back(Reg);
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ RV.push_back(*SubRegs);
+ }
+
+ struct BBInfo {
+ // Is this MBB reachable from the MF entry point?
+ bool reachable;
+
+ // Vregs that must be live in because they are used without being
+ // defined. Map value is the user.
+ RegMap vregsLiveIn;
+
+ // Regs killed in MBB. They may be defined again, and will then be in both
+ // regsKilled and regsLiveOut.
+ RegSet regsKilled;
+
+ // Regs defined in MBB and live out. Note that vregs passing through may
+ // be live out without being mentioned here.
+ RegSet regsLiveOut;
+
+ // Vregs that pass through MBB untouched. This set is disjoint from
+ // regsKilled and regsLiveOut.
+ RegSet vregsPassed;
+
+ // Vregs that must pass through MBB because they are needed by a successor
+ // block. This set is disjoint from regsLiveOut.
+ RegSet vregsRequired;
+
+ // Set versions of block's predecessor and successor lists.
+ BlockSet Preds, Succs;
+
+ BBInfo() : reachable(false) {}
+
+ // Add register to vregsPassed if it belongs there. Return true if
+ // anything changed.
+ bool addPassed(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+ return false;
+ return vregsPassed.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addPassed(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addPassed(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Add register to vregsRequired if it belongs there. Return true if
+ // anything changed.
+ bool addRequired(unsigned Reg) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (regsLiveOut.count(Reg))
+ return false;
+ return vregsRequired.insert(Reg).second;
+ }
+
+ // Same for a full set.
+ bool addRequired(const RegSet &RS) {
+ bool changed = false;
+ for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+ if (addRequired(*I))
+ changed = true;
+ return changed;
+ }
+
+ // Same for a full map.
+ bool addRequired(const RegMap &RM) {
+ bool changed = false;
+ for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
+ if (addRequired(I->first))
+ changed = true;
+ return changed;
+ }
+
+ // Live-out registers are either in regsLiveOut or vregsPassed.
+ bool isLiveOut(unsigned Reg) const {
+ return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+ }
+ };
+
+ // Extra register info per MBB.
+ DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+ bool isReserved(unsigned Reg) {
+ return Reg < regsReserved.size() && regsReserved.test(Reg);
+ }
+
+ bool isAllocatable(unsigned Reg) {
+ return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
+ }
+
+ // Analysis information if available
+ LiveVariables *LiveVars;
+ LiveIntervals *LiveInts;
+ LiveStacks *LiveStks;
+ SlotIndexes *Indexes;
+
+ void visitMachineFunctionBefore();
+ void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+ void visitMachineBundleBefore(const MachineInstr *MI);
+ void visitMachineInstrBefore(const MachineInstr *MI);
+ void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+ void visitMachineInstrAfter(const MachineInstr *MI);
+ void visitMachineBundleAfter(const MachineInstr *MI);
+ void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+ void visitMachineFunctionAfter();
+
+ template <typename T> void report(const char *msg, ilist_iterator<T> I) {
+ report(msg, &*I);
+ }
+ void report(const char *msg, const MachineFunction *MF);
+ void report(const char *msg, const MachineBasicBlock *MBB);
+ void report(const char *msg, const MachineInstr *MI);
+ void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+
+ void report_context(const LiveInterval &LI) const;
+ void report_context(const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask) const;
+ void report_context(const LiveRange::Segment &S) const;
+ void report_context(const VNInfo &VNI) const;
+ void report_context(SlotIndex Pos) const;
+ void report_context_liverange(const LiveRange &LR) const;
+ void report_context_lanemask(LaneBitmask LaneMask) const;
+ void report_context_vreg(unsigned VReg) const;
+ void report_context_vreg_regunit(unsigned VRegOrRegUnit) const;
+
+ void verifyInlineAsm(const MachineInstr *MI);
+
+ void checkLiveness(const MachineOperand *MO, unsigned MONum);
+ void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum,
+ SlotIndex UseIdx, const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask = 0);
+ void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
+ SlotIndex DefIdx, const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask = 0);
+
+ void markReachable(const MachineBasicBlock *MBB);
+ void calcRegsPassed();
+ void checkPHIOps(const MachineBasicBlock *MBB);
+
+ void calcRegsRequired();
+ void verifyLiveVariables();
+ void verifyLiveIntervals();
+ void verifyLiveInterval(const LiveInterval&);
+ void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned,
+ unsigned);
+ void verifyLiveRangeSegment(const LiveRange&,
+ const LiveRange::const_iterator I, unsigned,
+ unsigned);
+ void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0);
+
+ void verifyStackFrame();
+
+ void verifySlotIndexes() const;
+ void verifyProperties(const MachineFunction &MF);
+ };
+
+ struct MachineVerifierPass : public MachineFunctionPass {
+ static char ID; // Pass ID, replacement for typeid
+ const std::string Banner;
+
+ MachineVerifierPass(const std::string &banner = nullptr)
+ : MachineFunctionPass(ID), Banner(banner) {
+ initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF);
+ if (FoundErrors)
+ report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
+ return false;
+ }
+ };
+
+}
+
+char MachineVerifierPass::ID = 0;
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+ "Verify generated machine code", false, false)
+
+FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) {
+ return new MachineVerifierPass(Banner);
+}
+
+bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors)
+ const {
+ MachineFunction &MF = const_cast<MachineFunction&>(*this);
+ unsigned FoundErrors = MachineVerifier(p, Banner).verify(MF);
+ if (AbortOnErrors && FoundErrors)
+ report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
+ return FoundErrors == 0;
+}
+
+void MachineVerifier::verifySlotIndexes() const {
+ if (Indexes == nullptr)
+ return;
+
+ // Ensure the IdxMBB list is sorted by slot indexes.
+ SlotIndex Last;
+ for (SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(),
+ E = Indexes->MBBIndexEnd(); I != E; ++I) {
+ assert(!Last.isValid() || I->first > Last);
+ Last = I->first;
+ }
+}
+
+void MachineVerifier::verifyProperties(const MachineFunction &MF) {
+ // If a pass has introduced virtual registers without clearing the
+ // AllVRegsAllocated property (or set it without allocating the vregs)
+ // then report an error.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::AllVRegsAllocated) &&
+ MRI->getNumVirtRegs()) {
+ report(
+ "Function has AllVRegsAllocated property but there are VReg operands",
+ &MF);
+ }
+}
+
+unsigned MachineVerifier::verify(MachineFunction &MF) {
+ foundErrors = 0;
+
+ this->MF = &MF;
+ TM = &MF.getTarget();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ LiveVars = nullptr;
+ LiveInts = nullptr;
+ LiveStks = nullptr;
+ Indexes = nullptr;
+ if (PASS) {
+ LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+ // We don't want to verify LiveVariables if LiveIntervals is available.
+ if (!LiveInts)
+ LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+ Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
+ }
+
+ verifySlotIndexes();
+
+ verifyProperties(MF);
+
+ visitMachineFunctionBefore();
+ for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+ MFI!=MFE; ++MFI) {
+ visitMachineBasicBlockBefore(&*MFI);
+ // Keep track of the current bundle header.
+ const MachineInstr *CurBundle = nullptr;
+ // Do we expect the next instruction to be part of the same bundle?
+ bool InBundle = false;
+
+ for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
+ if (MBBI->getParent() != &*MFI) {
+ report("Bad instruction parent pointer", MFI);
+ errs() << "Instruction: " << *MBBI;
+ continue;
+ }
+
+ // Check for consistent bundle flags.
+ if (InBundle && !MBBI->isBundledWithPred())
+ report("Missing BundledPred flag, "
+ "BundledSucc was set on predecessor",
+ &*MBBI);
+ if (!InBundle && MBBI->isBundledWithPred())
+ report("BundledPred flag is set, "
+ "but BundledSucc not set on predecessor",
+ &*MBBI);
+
+ // Is this a bundle header?
+ if (!MBBI->isInsideBundle()) {
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ CurBundle = &*MBBI;
+ visitMachineBundleBefore(CurBundle);
+ } else if (!CurBundle)
+ report("No bundle header", MBBI);
+ visitMachineInstrBefore(&*MBBI);
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ const MachineInstr &MI = *MBBI;
+ const MachineOperand &Op = MI.getOperand(I);
+ if (Op.getParent() != &MI) {
+ // Make sure to use correct addOperand / RemoveOperand / ChangeTo
+ // functions when replacing operands of a MachineInstr.
+ report("Instruction has operand with wrong parent set", &MI);
+ }
+
+ visitMachineOperand(&Op, I);
+ }
+
+ visitMachineInstrAfter(&*MBBI);
+
+ // Was this the last bundled instruction?
+ InBundle = MBBI->isBundledWithSucc();
+ }
+ if (CurBundle)
+ visitMachineBundleAfter(CurBundle);
+ if (InBundle)
+ report("BundledSucc flag set on last instruction in block", &MFI->back());
+ visitMachineBasicBlockAfter(&*MFI);
+ }
+ visitMachineFunctionAfter();
+
+ // Clean up.
+ regsLive.clear();
+ regsDefined.clear();
+ regsDead.clear();
+ regsKilled.clear();
+ regMasks.clear();
+ regsLiveInButUnused.clear();
+ MBBInfoMap.clear();
+
+ return foundErrors;
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
+ assert(MF);
+ errs() << '\n';
+ if (!foundErrors++) {
+ if (Banner)
+ errs() << "# " << Banner << '\n';
+ if (LiveInts != nullptr)
+ LiveInts->print(errs());
+ else
+ MF->print(errs(), Indexes);
+ }
+ errs() << "*** Bad machine code: " << msg << " ***\n"
+ << "- function: " << MF->getName() << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
+ assert(MBB);
+ report(msg, MBB->getParent());
+ errs() << "- basic block: BB#" << MBB->getNumber()
+ << ' ' << MBB->getName()
+ << " (" << (const void*)MBB << ')';
+ if (Indexes)
+ errs() << " [" << Indexes->getMBBStartIdx(MBB)
+ << ';' << Indexes->getMBBEndIdx(MBB) << ')';
+ errs() << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
+ assert(MI);
+ report(msg, MI->getParent());
+ errs() << "- instruction: ";
+ if (Indexes && Indexes->hasIndex(*MI))
+ errs() << Indexes->getInstructionIndex(*MI) << '\t';
+ MI->print(errs(), /*SkipOpers=*/true);
+ errs() << '\n';
+}
+
+void MachineVerifier::report(const char *msg,
+ const MachineOperand *MO, unsigned MONum) {
+ assert(MO);
+ report(msg, MO->getParent());
+ errs() << "- operand " << MONum << ": ";
+ MO->print(errs(), TRI);
+ errs() << "\n";
+}
+
+void MachineVerifier::report_context(SlotIndex Pos) const {
+ errs() << "- at: " << Pos << '\n';
+}
+
+void MachineVerifier::report_context(const LiveInterval &LI) const {
+ errs() << "- interval: " << LI << '\n';
+}
+
+void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask) const {
+ report_context_liverange(LR);
+ errs() << "- register: " << PrintReg(Reg, TRI) << '\n';
+ if (LaneMask != 0)
+ report_context_lanemask(LaneMask);
+}
+
+void MachineVerifier::report_context(const LiveRange::Segment &S) const {
+ errs() << "- segment: " << S << '\n';
+}
+
+void MachineVerifier::report_context(const VNInfo &VNI) const {
+ errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n";
+}
+
+void MachineVerifier::report_context_liverange(const LiveRange &LR) const {
+ errs() << "- liverange: " << LR << '\n';
+}
+
+void MachineVerifier::report_context_vreg(unsigned VReg) const {
+ errs() << "- v. register: " << PrintReg(VReg, TRI) << '\n';
+}
+
+void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const {
+ if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
+ report_context_vreg(VRegOrUnit);
+ } else {
+ errs() << "- regunit: " << PrintRegUnit(VRegOrUnit, TRI) << '\n';
+ }
+}
+
+void MachineVerifier::report_context_lanemask(LaneBitmask LaneMask) const {
+ errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n';
+}
+
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ if (!MInfo.reachable) {
+ MInfo.reachable = true;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+ markReachable(*SuI);
+ }
+}
+
+void MachineVerifier::visitMachineFunctionBefore() {
+ lastIndex = SlotIndex();
+ regsReserved = MRI->getReservedRegs();
+
+ // A sub-register of a reserved register is also reserved
+ for (int Reg = regsReserved.find_first(); Reg>=0;
+ Reg = regsReserved.find_next(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ // FIXME: This should probably be:
+ // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register");
+ regsReserved.set(*SubRegs);
+ }
+ }
+
+ markReachable(&MF->front());
+
+ // Build a set of the basic blocks in the function.
+ FunctionBlocks.clear();
+ for (const auto &MBB : *MF) {
+ FunctionBlocks.insert(&MBB);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+
+ MInfo.Preds.insert(MBB.pred_begin(), MBB.pred_end());
+ if (MInfo.Preds.size() != MBB.pred_size())
+ report("MBB has duplicate entries in its predecessor list.", &MBB);
+
+ MInfo.Succs.insert(MBB.succ_begin(), MBB.succ_end());
+ if (MInfo.Succs.size() != MBB.succ_size())
+ report("MBB has duplicate entries in its successor list.", &MBB);
+ }
+
+ // Check that the register use lists are sane.
+ MRI->verifyUseLists();
+
+ verifyStackFrame();
+}
+
+// Does iterator point to a and b as the first two elements?
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+ const MachineBasicBlock *a, const MachineBasicBlock *b) {
+ if (*i == a)
+ return *++i == b;
+ if (*i == b)
+ return *++i == a;
+ return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+ FirstTerminator = nullptr;
+
+ if (MRI->isSSA()) {
+ // If this block has allocatable physical registers live-in, check that
+ // it is an entry block or landing pad.
+ for (const auto &LI : MBB->liveins()) {
+ if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
+ MBB->getIterator() != MBB->getParent()->begin()) {
+ report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+ }
+ }
+ }
+
+ // Count the number of landing pad successors.
+ SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ if ((*I)->isEHPad())
+ LandingPadSuccs.insert(*I);
+ if (!FunctionBlocks.count(*I))
+ report("MBB has successor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Preds.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ errs() << "MBB is not in the predecessor list of the successor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ // Check the predecessor list.
+ for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+ E = MBB->pred_end(); I != E; ++I) {
+ if (!FunctionBlocks.count(*I))
+ report("MBB has predecessor that isn't part of the function.", MBB);
+ if (!MBBInfoMap[*I].Succs.count(MBB)) {
+ report("Inconsistent CFG", MBB);
+ errs() << "MBB is not in the successor list of the predecessor BB#"
+ << (*I)->getNumber() << ".\n";
+ }
+ }
+
+ const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
+ const BasicBlock *BB = MBB->getBasicBlock();
+ const Function *Fn = MF->getFunction();
+ if (LandingPadSuccs.size() > 1 &&
+ !(AsmInfo &&
+ AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
+ BB && isa<SwitchInst>(BB->getTerminator())) &&
+ !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn())))
+ report("MBB has more than one landing pad successor", MBB);
+
+ // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->analyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB,
+ Cond)) {
+ // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+ // check whether its answers match up with reality.
+ if (!TBB && !FBB) {
+ // Block falls through to its successor.
+ MachineFunction::const_iterator MBBI = MBB->getIterator();
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actually fall
+ // out the bottom of the function.
+ } else if (MBB->succ_size() == LandingPadSuccs.size()) {
+ // It's possible that the block legitimately ends with a noreturn
+ // call or an unreachable, in which case it won't actuall fall
+ // out of the block.
+ } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+ report("MBB exits via unconditional fall-through but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(&*MBBI)) {
+ report("MBB exits via unconditional fall-through but its successor "
+ "differs from its CFG successor!", MBB);
+ }
+ if (!MBB->empty() && MBB->back().isBarrier() &&
+ !TII->isPredicated(MBB->back())) {
+ report("MBB exits via unconditional fall-through but ends with a "
+ "barrier instruction!", MBB);
+ }
+ if (!Cond.empty()) {
+ report("MBB exits via unconditional fall-through but has a condition!",
+ MBB);
+ }
+ } else if (TBB && !FBB && Cond.empty()) {
+ // Block unconditionally branches somewhere.
+ // If the block has exactly one successor, that happens to be a
+ // landingpad, accept it as valid control flow.
+ if (MBB->succ_size() != 1+LandingPadSuccs.size() &&
+ (MBB->succ_size() != 1 || LandingPadSuccs.size() != 1 ||
+ *MBB->succ_begin() != *LandingPadSuccs.begin())) {
+ report("MBB exits via unconditional branch but doesn't have "
+ "exactly one CFG successor!", MBB);
+ } else if (!MBB->isSuccessor(TBB)) {
+ report("MBB exits via unconditional branch but the CFG "
+ "successor doesn't match the actual successor!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via unconditional branch but doesn't contain "
+ "any instructions!", MBB);
+ } else if (!MBB->back().isBarrier()) {
+ report("MBB exits via unconditional branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().isTerminator()) {
+ report("MBB exits via unconditional branch but the branch isn't a "
+ "terminator instruction!", MBB);
+ }
+ } else if (TBB && !FBB && !Cond.empty()) {
+ // Block conditionally branches somewhere, otherwise falls through.
+ MachineFunction::const_iterator MBBI = MBB->getIterator();
+ ++MBBI;
+ if (MBBI == MF->end()) {
+ report("MBB conditionally falls through out of function!", MBB);
+ } else if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (&*MBBI != TBB)
+ report("MBB exits via conditional branch/fall-through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/fall-through but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, &*MBBI)) {
+ report("MBB exits via conditional branch/fall-through but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/fall-through but doesn't "
+ "contain any instructions!", MBB);
+ } else if (MBB->back().isBarrier()) {
+ report("MBB exits via conditional branch/fall-through but ends with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().isTerminator()) {
+ report("MBB exits via conditional branch/fall-through but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ } else if (TBB && FBB) {
+ // Block conditionally branches somewhere, otherwise branches
+ // somewhere else.
+ if (MBB->succ_size() == 1) {
+ // A conditional branch with only one successor is weird, but allowed.
+ if (FBB != TBB)
+ report("MBB exits via conditional branch/branch through but only has "
+ "one CFG successor!", MBB);
+ else if (TBB != *MBB->succ_begin())
+ report("MBB exits via conditional branch/branch through but the CFG "
+ "successor don't match the actual successor!", MBB);
+ } else if (MBB->succ_size() != 2) {
+ report("MBB exits via conditional branch/branch but doesn't have "
+ "exactly two CFG successors!", MBB);
+ } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
+ report("MBB exits via conditional branch/branch but the CFG "
+ "successors don't match the actual successors!", MBB);
+ }
+ if (MBB->empty()) {
+ report("MBB exits via conditional branch/branch but doesn't "
+ "contain any instructions!", MBB);
+ } else if (!MBB->back().isBarrier()) {
+ report("MBB exits via conditional branch/branch but doesn't end with a "
+ "barrier instruction!", MBB);
+ } else if (!MBB->back().isTerminator()) {
+ report("MBB exits via conditional branch/branch but the branch "
+ "isn't a terminator instruction!", MBB);
+ }
+ if (Cond.empty()) {
+ report("MBB exits via conditinal branch/branch but there's no "
+ "condition!", MBB);
+ }
+ } else {
+ report("AnalyzeBranch returned invalid data!", MBB);
+ }
+ }
+
+ regsLive.clear();
+ for (const auto &LI : MBB->liveins()) {
+ if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
+ report("MBB live-in list contains non-physical register", MBB);
+ continue;
+ }
+ for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
+ }
+ regsLiveInButUnused = regsLive;
+
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ assert(MFI && "Function has no frame info");
+ BitVector PR = MFI->getPristineRegs(*MF);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+ for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ regsLive.insert(*SubRegs);
+ }
+
+ regsKilled.clear();
+ regsDefined.clear();
+
+ if (Indexes)
+ lastIndex = Indexes->getMBBStartIdx(MBB);
+}
+
+// This function gets called for all bundle headers, including normal
+// stand-alone unbundled instructions.
+void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
+ if (Indexes && Indexes->hasIndex(*MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(*MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ errs() << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
+
+ // Ensure non-terminators don't follow terminators.
+ // Ignore predicated terminators formed by if conversion.
+ // FIXME: If conversion shouldn't need to violate this rule.
+ if (MI->isTerminator() && !TII->isPredicated(*MI)) {
+ if (!FirstTerminator)
+ FirstTerminator = MI;
+ } else if (FirstTerminator) {
+ report("Non-terminator instruction after the first terminator", MI);
+ errs() << "First terminator was:\t" << *FirstTerminator;
+ }
+}
+
+// The operands on an INLINEASM instruction must follow a template.
+// Verify that the flag operands make sense.
+void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
+ // The first two operands on INLINEASM are the asm string and global flags.
+ if (MI->getNumOperands() < 2) {
+ report("Too few operands on inline asm", MI);
+ return;
+ }
+ if (!MI->getOperand(0).isSymbol())
+ report("Asm string must be an external symbol", MI);
+ if (!MI->getOperand(1).isImm())
+ report("Asm flags must be an immediate", MI);
+ // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2,
+ // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16,
+ // and Extra_IsConvergent = 32.
+ if (!isUInt<6>(MI->getOperand(1).getImm()))
+ report("Unknown asm flags", &MI->getOperand(1), 1);
+
+ static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed");
+
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+ unsigned NumOps;
+ for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ // There may be implicit ops after the fixed operands.
+ if (!MO.isImm())
+ break;
+ NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm());
+ }
+
+ if (OpNo > MI->getNumOperands())
+ report("Missing operands in last group", MI);
+
+ // An optional MDNode follows the groups.
+ if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata())
+ ++OpNo;
+
+ // All trailing operands must be implicit registers.
+ for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (!MO.isReg() || !MO.isImplicit())
+ report("Expected implicit register after groups", &MO, OpNo);
+ }
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->getNumOperands() < MCID.getNumOperands()) {
+ report("Too few operands", MI);
+ errs() << MCID.getNumOperands() << " operands expected, but "
+ << MI->getNumOperands() << " given.\n";
+ }
+
+ // Check the tied operands.
+ if (MI->isInlineAsm())
+ verifyInlineAsm(MI);
+
+ // Check the MachineMemOperands for basic consistency.
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end(); I != E; ++I) {
+ if ((*I)->isLoad() && !MI->mayLoad())
+ report("Missing mayLoad flag", MI);
+ if ((*I)->isStore() && !MI->mayStore())
+ report("Missing mayStore flag", MI);
+ }
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one, unless they are inside a bundle.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(*MI);
+ if (MI->isDebugValue()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else if (MI->isInsideBundle()) {
+ if (mapped)
+ report("Instruction inside bundle has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
+ StringRef ErrorInfo;
+ if (!TII->verifyInstruction(*MI, ErrorInfo))
+ report(ErrorInfo.data(), MI);
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumDefs = MCID.getNumDefs();
+ if (MCID.getOpcode() == TargetOpcode::PATCHPOINT)
+ NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0;
+
+ // The first MCID.NumDefs operands must be explicit register defines
+ if (MONum < NumDefs) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+ if (!MO->isReg())
+ report("Explicit definition must be a register", MO, MONum);
+ else if (!MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit definition marked as use", MO, MONum);
+ else if (MO->isImplicit())
+ report("Explicit definition marked as implicit", MO, MONum);
+ } else if (MONum < MCID.getNumOperands()) {
+ const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
+ // Don't check if it's the last operand in a variadic instruction. See,
+ // e.g., LDM_RET in the arm back end.
+ if (MO->isReg() &&
+ !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
+ if (MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit operand marked as def", MO, MONum);
+ if (MO->isImplicit())
+ report("Explicit operand marked as implicit", MO, MONum);
+ }
+
+ int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
+ if (TiedTo != -1) {
+ if (!MO->isReg())
+ report("Tied use must be a register", MO, MONum);
+ else if (!MO->isTied())
+ report("Operand should be tied", MO, MONum);
+ else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
+ report("Tied def doesn't match MCInstrDesc", MO, MONum);
+ } else if (MO->isReg() && MO->isTied())
+ report("Explicit operand should not be tied", MO, MONum);
+ } else {
+ // ARM adds %reg0 operands to indicate predicates. We'll allow that.
+ if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
+ report("Extra explicit operand on non-variadic instruction", MO, MONum);
+ }
+
+ switch (MO->getType()) {
+ case MachineOperand::MO_Register: {
+ const unsigned Reg = MO->getReg();
+ if (!Reg)
+ return;
+ if (MRI->tracksLiveness() && !MI->isDebugValue())
+ checkLiveness(MO, MONum);
+
+ // Verify the consistency of tied operands.
+ if (MO->isTied()) {
+ unsigned OtherIdx = MI->findTiedOperandIdx(MONum);
+ const MachineOperand &OtherMO = MI->getOperand(OtherIdx);
+ if (!OtherMO.isReg())
+ report("Must be tied to a register", MO, MONum);
+ if (!OtherMO.isTied())
+ report("Missing tie flags on tied operand", MO, MONum);
+ if (MI->findTiedOperandIdx(OtherIdx) != MONum)
+ report("Inconsistent tie links", MO, MONum);
+ if (MONum < MCID.getNumDefs()) {
+ if (OtherIdx < MCID.getNumOperands()) {
+ if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO))
+ report("Explicit def tied to explicit use without tie constraint",
+ MO, MONum);
+ } else {
+ if (!OtherMO.isImplicit())
+ report("Explicit def should be tied to implicit use", MO, MONum);
+ }
+ }
+ }
+
+ // Verify two-address constraints after leaving SSA form.
+ unsigned DefIdx;
+ if (!MRI->isSSA() && MO->isUse() &&
+ MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+ Reg != MI->getOperand(DefIdx).getReg())
+ report("Two-address instruction operands must be identical", MO, MONum);
+
+ // Check register classes.
+ if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
+ unsigned SubIdx = MO->getSubReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (SubIdx) {
+ report("Illegal subregister index for physical register", MO, MONum);
+ return;
+ }
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ if (!DRC->contains(Reg)) {
+ report("Illegal physical register for instruction", MO, MONum);
+ errs() << TRI->getName(Reg) << " is not a "
+ << TRI->getRegClassName(DRC) << " register.\n";
+ }
+ }
+ } else {
+ // Virtual register.
+ const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
+ if (!RC) {
+ // This is a generic virtual register.
+ // It must have a size and it must not have a SubIdx.
+ unsigned Size = MRI->getSize(Reg);
+ if (!Size) {
+ report("Generic virtual register must have a size", MO, MONum);
+ return;
+ }
+ // Make sure the register fits into its register bank if any.
+ const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
+ if (RegBank && RegBank->getSize() < Size) {
+ report("Register bank is too small for virtual register", MO,
+ MONum);
+ errs() << "Register bank " << RegBank->getName() << " too small("
+ << RegBank->getSize() << ") to fit " << Size << "-bits\n";
+ return;
+ }
+ if (SubIdx) {
+ report("Generic virtual register does not subregister index", MO, MONum);
+ return;
+ }
+ break;
+ }
+ if (SubIdx) {
+ const TargetRegisterClass *SRC =
+ TRI->getSubClassWithSubReg(RC, SubIdx);
+ if (!SRC) {
+ report("Invalid subregister index for virtual register", MO, MONum);
+ errs() << "Register class " << TRI->getRegClassName(RC)
+ << " does not support subreg index " << SubIdx << "\n";
+ return;
+ }
+ if (RC != SRC) {
+ report("Invalid register class for subregister index", MO, MONum);
+ errs() << "Register class " << TRI->getRegClassName(RC)
+ << " does not fully support subreg index " << SubIdx << "\n";
+ return;
+ }
+ }
+ if (const TargetRegisterClass *DRC =
+ TII->getRegClass(MCID, MONum, TRI, *MF)) {
+ if (SubIdx) {
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(RC, *MF);
+ if (!SuperRC) {
+ report("No largest legal super class exists.", MO, MONum);
+ return;
+ }
+ DRC = TRI->getMatchingSuperRegClass(SuperRC, DRC, SubIdx);
+ if (!DRC) {
+ report("No matching super-reg register class.", MO, MONum);
+ return;
+ }
+ }
+ if (!RC->hasSuperClassEq(DRC)) {
+ report("Illegal virtual register for instruction", MO, MONum);
+ errs() << "Expected a " << TRI->getRegClassName(DRC)
+ << " register, but got a " << TRI->getRegClassName(RC)
+ << " register\n";
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case MachineOperand::MO_RegisterMask:
+ regMasks.push_back(MO->getRegMask());
+ break;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+ report("PHI operand is not in the CFG", MO, MONum);
+ break;
+
+ case MachineOperand::MO_FrameIndex:
+ if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+ LiveInts && !LiveInts->isNotInMIMap(*MI)) {
+ int FI = MO->getIndex();
+ LiveInterval &LI = LiveStks->getInterval(FI);
+ SlotIndex Idx = LiveInts->getInstructionIndex(*MI);
+
+ bool stores = MI->mayStore();
+ bool loads = MI->mayLoad();
+ // For a memory-to-memory move, we need to check if the frame
+ // index is used for storing or loading, by inspecting the
+ // memory operands.
+ if (stores && loads) {
+ for (auto *MMO : MI->memoperands()) {
+ const PseudoSourceValue *PSV = MMO->getPseudoValue();
+ if (PSV == nullptr) continue;
+ const FixedStackPseudoSourceValue *Value =
+ dyn_cast<FixedStackPseudoSourceValue>(PSV);
+ if (Value == nullptr) continue;
+ if (Value->getFrameIndex() != FI) continue;
+
+ if (MMO->isStore())
+ loads = false;
+ else
+ stores = false;
+ break;
+ }
+ if (loads == stores)
+ report("Missing fixed stack memoperand.", MI);
+ }
+ if (loads && !LI.liveAt(Idx.getRegSlot(true))) {
+ report("Instruction loads from dead spill slot", MO, MONum);
+ errs() << "Live stack: " << LI << '\n';
+ }
+ if (stores && !LI.liveAt(Idx.getRegSlot())) {
+ report("Instruction stores to dead spill slot", MO, MONum);
+ errs() << "Live stack: " << LI << '\n';
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
+ unsigned MONum, SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit,
+ LaneBitmask LaneMask) {
+ LiveQueryResult LRQ = LR.Query(UseIdx);
+ // Check if we have a segment at the use, note however that we only need one
+ // live subregister range, the others may be dead.
+ if (!LRQ.valueIn() && LaneMask == 0) {
+ report("No live segment at use", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ report_context(UseIdx);
+ }
+ if (MO->isKill() && !LRQ.isKill()) {
+ report("Live range continues after kill flag", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask != 0)
+ report_context_lanemask(LaneMask);
+ report_context(UseIdx);
+ }
+}
+
+void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
+ unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
+ LaneBitmask LaneMask) {
+ if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx) {
+ report("Inconsistent valno->def", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask != 0)
+ report_context_lanemask(LaneMask);
+ report_context(*VNI);
+ report_context(DefIdx);
+ }
+ } else {
+ report("No live segment at def", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask != 0)
+ report_context_lanemask(LaneMask);
+ report_context(DefIdx);
+ }
+ // Check that, if the dead def flag is present, LiveInts agree.
+ if (MO->isDead()) {
+ LiveQueryResult LRQ = LR.Query(DefIdx);
+ if (!LRQ.isDeadDef()) {
+ // In case of physregs we can have a non-dead definition on another
+ // operand.
+ bool otherDef = false;
+ if (!TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
+ const MachineInstr &MI = *MO->getParent();
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ continue;
+ unsigned Reg = MO.getReg();
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (*Units == VRegOrUnit) {
+ otherDef = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!otherDef) {
+ report("Live range continues after dead def flag", MO, MONum);
+ report_context_liverange(LR);
+ report_context_vreg_regunit(VRegOrUnit);
+ if (LaneMask != 0)
+ report_context_lanemask(LaneMask);
+ }
+ }
+ }
+}
+
+void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
+ const MachineInstr *MI = MO->getParent();
+ const unsigned Reg = MO->getReg();
+
+ // Both use and def operands can read a register.
+ if (MO->readsReg()) {
+ regsLiveInButUnused.erase(Reg);
+
+ if (MO->isKill())
+ addRegWithSubRegs(regsKilled, Reg);
+
+ // Check that LiveVars knows this kill.
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MO->isKill()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
+ // Check the cached regunit intervals.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units))
+ checkLivenessAtUse(MO, MONum, UseIdx, *LR, *Units);
+ }
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (LiveInts->hasInterval(Reg)) {
+ // This is a virtual register interval.
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ checkLivenessAtUse(MO, MONum, UseIdx, LI, Reg);
+
+ if (LI.hasSubRanges() && !MO->isDef()) {
+ unsigned SubRegIdx = MO->getSubReg();
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask LiveInMask = 0;
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((MOMask & SR.LaneMask) == 0)
+ continue;
+ checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
+ LiveQueryResult LRQ = SR.Query(UseIdx);
+ if (LRQ.valueIn())
+ LiveInMask |= SR.LaneMask;
+ }
+ // At least parts of the register has to be live at the use.
+ if ((LiveInMask & MOMask) == 0) {
+ report("No live subrange at use", MO, MONum);
+ report_context(LI);
+ report_context(UseIdx);
+ }
+ }
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
+ }
+ }
+ }
+
+ // Use of a dead register.
+ if (!regsLive.count(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Reserved registers may be used even when 'dead'.
+ bool Bad = !isReserved(Reg);
+ // We are fine if just any subregister has a defined value.
+ if (Bad) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid();
+ ++SubRegs) {
+ if (regsLive.count(*SubRegs)) {
+ Bad = false;
+ break;
+ }
+ }
+ }
+ // If there is an additional implicit-use of a super register we stop
+ // here. By definition we are fine if the super register is not
+ // (completely) dead, if the complete super register is dead we will
+ // get a report for its operand.
+ if (Bad) {
+ for (const MachineOperand &MOP : MI->uses()) {
+ if (!MOP.isReg())
+ continue;
+ if (!MOP.isImplicit())
+ continue;
+ for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
+ ++SubRegs) {
+ if (*SubRegs == Reg) {
+ Bad = false;
+ break;
+ }
+ }
+ }
+ }
+ if (Bad)
+ report("Using an undefined physical register", MO, MONum);
+ } else if (MRI->def_empty(Reg)) {
+ report("Reading virtual register without a def", MO, MONum);
+ } else {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ // We don't know which virtual registers are live in, so only complain
+ // if vreg was killed in this MBB. Otherwise keep track of vregs that
+ // must be live in. PHI instructions are handled separately.
+ if (MInfo.regsKilled.count(Reg))
+ report("Using a killed virtual register", MO, MONum);
+ else if (!MI->isPHI())
+ MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+ }
+ }
+ }
+
+ if (MO->isDef()) {
+ // Register defined.
+ // TODO: verify that earlyclobber ops are not used.
+ if (MO->isDead())
+ addRegWithSubRegs(regsDead, Reg);
+ else
+ addRegWithSubRegs(regsDefined, Reg);
+
+ // Verify SSA form.
+ if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ std::next(MRI->def_begin(Reg)) != MRI->def_end())
+ report("Multiple virtual register defs in SSA form", MO, MONum);
+
+ // Check LiveInts for a live segment, but only for virtual registers.
+ if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI);
+ DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg);
+
+ if (LI.hasSubRanges()) {
+ unsigned SubRegIdx = MO->getSubReg();
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((SR.LaneMask & MOMask) == 0)
+ continue;
+ checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask);
+ }
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+}
+
+// This function gets called after visiting all instructions in a bundle. The
+// argument points to the bundle header.
+// Normal stand-alone instructions are also considered 'bundles', and this
+// function is called for all of them.
+void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
+ BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+ set_union(MInfo.regsKilled, regsKilled);
+ set_subtract(regsLive, regsKilled); regsKilled.clear();
+ // Kill any masked registers.
+ while (!regMasks.empty()) {
+ const uint32_t *Mask = regMasks.pop_back_val();
+ for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
+ if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+ MachineOperand::clobbersPhysReg(Mask, *I))
+ regsDead.push_back(*I);
+ }
+ set_subtract(regsLive, regsDead); regsDead.clear();
+ set_union(regsLive, regsDefined); regsDefined.clear();
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
+ MBBInfoMap[MBB].regsLiveOut = regsLive;
+ regsLive.clear();
+
+ if (Indexes) {
+ SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+ if (!(stop > lastIndex)) {
+ report("Block ends before last instruction index", MBB);
+ errs() << "Block ends at " << stop
+ << " last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = stop;
+ }
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void MachineVerifier::calcRegsPassed() {
+ // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+ // have any vregsPassed.
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ if (!MInfo.reachable)
+ continue;
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+ SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.regsLiveOut))
+ todo.insert(*SuI);
+ }
+ }
+
+ // Iteratively push vregsPassed to successors. This will converge to the same
+ // final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+ SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+ if (*SuI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*SuI];
+ if (SInfo.addPassed(MInfo.vregsPassed))
+ todo.insert(*SuI);
+ }
+ }
+}
+
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+ // First push live-in regs to predecessors' vregsRequired.
+ SmallPtrSet<const MachineBasicBlock*, 8> todo;
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
+ PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
+ BBInfo &PInfo = MBBInfoMap[*PrI];
+ if (PInfo.addRequired(MInfo.vregsLiveIn))
+ todo.insert(*PrI);
+ }
+ }
+
+ // Iteratively push vregsRequired to predecessors. This will converge to the
+ // same final state regardless of DenseSet iteration order.
+ while (!todo.empty()) {
+ const MachineBasicBlock *MBB = *todo.begin();
+ todo.erase(MBB);
+ BBInfo &MInfo = MBBInfoMap[MBB];
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (*PrI == MBB)
+ continue;
+ BBInfo &SInfo = MBBInfoMap[*PrI];
+ if (SInfo.addRequired(MInfo.vregsRequired))
+ todo.insert(*PrI);
+ }
+ }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+ SmallPtrSet<const MachineBasicBlock*, 8> seen;
+ for (const auto &BBI : *MBB) {
+ if (!BBI.isPHI())
+ break;
+ seen.clear();
+
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI.getOperand(i).getReg();
+ const MachineBasicBlock *Pre = BBI.getOperand(i + 1).getMBB();
+ if (!Pre->isSuccessor(MBB))
+ continue;
+ seen.insert(Pre);
+ BBInfo &PrInfo = MBBInfoMap[Pre];
+ if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+ report("PHI operand is not live-out from predecessor",
+ &BBI.getOperand(i), i);
+ }
+
+ // Did we see all predecessors?
+ for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+ PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+ if (!seen.count(*PrI)) {
+ report("Missing PHI operand", &BBI);
+ errs() << "BB#" << (*PrI)->getNumber()
+ << " is a predecessor according to the CFG.\n";
+ }
+ }
+ }
+}
+
+void MachineVerifier::visitMachineFunctionAfter() {
+ calcRegsPassed();
+
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+
+ // Skip unreachable MBBs.
+ if (!MInfo.reachable)
+ continue;
+
+ checkPHIOps(&MBB);
+ }
+
+ // Now check liveness info if available
+ calcRegsRequired();
+
+ // Check for killed virtual registers that should be live out.
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I)
+ if (MInfo.regsKilled.count(*I)) {
+ report("Virtual register killed in block, but needed live out.", &MBB);
+ errs() << "Virtual register " << PrintReg(*I)
+ << " is used after the block.\n";
+ }
+ }
+
+ if (!MF->empty()) {
+ BBInfo &MInfo = MBBInfoMap[&MF->front()];
+ for (RegSet::iterator
+ I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+ ++I) {
+ report("Virtual register defs don't dominate all uses.", MF);
+ report_context_vreg(*I);
+ }
+ }
+
+ if (LiveVars)
+ verifyLiveVariables();
+ if (LiveInts)
+ verifyLiveIntervals();
+}
+
+void MachineVerifier::verifyLiveVariables() {
+ assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+
+ // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+ if (MInfo.vregsRequired.count(Reg)) {
+ if (!VI.AliveBlocks.test(MBB.getNumber())) {
+ report("LiveVariables: Block missing from AliveBlocks", &MBB);
+ errs() << "Virtual register " << PrintReg(Reg)
+ << " must be live through the block.\n";
+ }
+ } else {
+ if (VI.AliveBlocks.test(MBB.getNumber())) {
+ report("LiveVariables: Block should not be in AliveBlocks", &MBB);
+ errs() << "Virtual register " << PrintReg(Reg)
+ << " is not needed live through the block.\n";
+ }
+ }
+ }
+ }
+}
+
+void MachineVerifier::verifyLiveIntervals() {
+ assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+
+ // Spilling and splitting may leave unused registers around. Skip them.
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+
+ if (!LiveInts->hasInterval(Reg)) {
+ report("Missing live interval for virtual register", MF);
+ errs() << PrintReg(Reg, TRI) << " still has defs or uses\n";
+ continue;
+ }
+
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ assert(Reg == LI.reg && "Invalid reg to interval mapping");
+ verifyLiveInterval(LI);
+ }
+
+ // Verify all the cached regunit intervals.
+ for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+ if (const LiveRange *LR = LiveInts->getCachedRegUnit(i))
+ verifyLiveRange(*LR, i);
+}
+
+void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
+ const VNInfo *VNI, unsigned Reg,
+ LaneBitmask LaneMask) {
+ if (VNI->isUnused())
+ return;
+
+ const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def);
+
+ if (!DefVNI) {
+ report("Value not live at VNInfo def and not marked unused", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ return;
+ }
+
+ if (DefVNI != VNI) {
+ report("Live segment at def has different VNInfo", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ return;
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid VNInfo definition index", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ return;
+ }
+
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef VNInfo is not defined at MBB start", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ }
+ return;
+ }
+
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at VNInfo def index", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ return;
+ }
+
+ if (Reg != 0) {
+ bool hasDef = false;
+ bool isEarlyClobber = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (MOI->getReg() != Reg)
+ continue;
+ } else {
+ if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ !TRI->hasRegUnit(MOI->getReg(), Reg))
+ continue;
+ }
+ if (LaneMask != 0 &&
+ (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask) == 0)
+ continue;
+ hasDef = true;
+ if (MOI->isEarlyClobber())
+ isEarlyClobber = true;
+ }
+
+ if (!hasDef) {
+ report("Defining instruction does not modify register", MI);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ }
+
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isEarlyClobber()) {
+ report("Early clobber def must be at an early-clobber slot", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ }
+ } else if (!VNI->def.isRegister()) {
+ report("Non-PHI, non-early clobber def must be at a register slot", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ }
+ }
+}
+
+void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
+ const LiveRange::const_iterator I,
+ unsigned Reg, LaneBitmask LaneMask)
+{
+ const LiveRange::Segment &S = *I;
+ const VNInfo *VNI = S.valno;
+ assert(VNI && "Live segment has no valno");
+
+ if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
+ report("Foreign valno in live segment", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ report_context(*VNI);
+ }
+
+ if (VNI->isUnused()) {
+ report("Live segment valno is marked unused", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ return;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (S.start != MBBStartIdx && S.start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(S.end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ return;
+ }
+
+ // No more checks for live-out segments.
+ if (S.end == LiveInts->getMBBEndIdx(EndMBB))
+ return;
+
+ // RegUnit intervals are allowed dead phis.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() &&
+ S.start == VNI->def && S.end == VNI->def.getDeadSlot())
+ return;
+
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ return;
+ }
+
+ // The block slot must refer to a basic block boundary.
+ if (S.end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+
+ if (S.end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(S.start, S.end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ }
+
+ // A live segment can only end at an early-clobber slot if it is being
+ // redefined by an early-clobber def.
+ if (S.end.isEarlyClobber()) {
+ if (I+1 == LR.end() || (I+1)->start != S.end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // A live segment can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasSubRegDef = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != Reg)
+ continue;
+ if (LaneMask != 0 &&
+ (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0)
+ continue;
+ if (MOI->isDef()) {
+ if (MOI->getSubReg() != 0)
+ hasSubRegDef = true;
+ if (MOI->isDead())
+ hasDeadDef = true;
+ }
+ if (MOI->readsReg())
+ hasRead = true;
+ }
+ if (S.end.isDead()) {
+ // Make sure that the corresponding machine operand for a "dead" live
+ // range has the dead flag. We cannot perform this check for subregister
+ // liveranges as partially dead values are allowed.
+ if (LaneMask == 0 && !hasDeadDef) {
+ report("Instruction ending live segment on dead slot has no dead flag",
+ MI);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ } else {
+ if (!hasRead) {
+ // When tracking subregister liveness, the main range must start new
+ // values on partial register writes, even if there is no read.
+ if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 ||
+ !hasSubRegDef) {
+ report("Instruction ending live segment doesn't read the register",
+ MI);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ }
+ }
+ }
+
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB->getIterator();
+ // Is this live segment the beginning of a non-PHIDef VN?
+ if (S.start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ return;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LR, &*MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MFI->isEHPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
+
+ // Is VNI a PHI-def in the current block?
+ bool IsPHI = VNI->isPHIDef() &&
+ VNI->def == LiveInts->getMBBStartIdx(&*MFI);
+
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+ const VNInfo *PVNI = LR.getVNInfoBefore(PEnd);
+
+ // All predecessors must have a live-out value if this is not a
+ // subregister liverange.
+ if (!PVNI && LaneMask == 0) {
+ report("Register not marked live out of predecessor", *PI);
+ report_context(LR, Reg, LaneMask);
+ report_context(*VNI);
+ errs() << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before "
+ << PEnd << '\n';
+ continue;
+ }
+
+ // Only PHI-defs can take different predecessor values.
+ if (!IsPHI && PVNI != VNI) {
+ report("Different value live out of predecessor", *PI);
+ report_context(LR, Reg, LaneMask);
+ errs() << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id
+ << " live into BB#" << MFI->getNumber() << '@'
+ << LiveInts->getMBBStartIdx(&*MFI) << '\n';
+ }
+ }
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ }
+}
+
+void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
+ LaneBitmask LaneMask) {
+ for (const VNInfo *VNI : LR.valnos)
+ verifyLiveRangeValue(LR, VNI, Reg, LaneMask);
+
+ for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I)
+ verifyLiveRangeSegment(LR, I, Reg, LaneMask);
+}
+
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+ unsigned Reg = LI.reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ verifyLiveRange(LI, Reg);
+
+ LaneBitmask Mask = 0;
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((Mask & SR.LaneMask) != 0) {
+ report("Lane masks of sub ranges overlap in live interval", MF);
+ report_context(LI);
+ }
+ if ((SR.LaneMask & ~MaxMask) != 0) {
+ report("Subrange lanemask is invalid", MF);
+ report_context(LI);
+ }
+ if (SR.empty()) {
+ report("Subrange must not be empty", MF);
+ report_context(SR, LI.reg, SR.LaneMask);
+ }
+ Mask |= SR.LaneMask;
+ verifyLiveRange(SR, LI.reg, SR.LaneMask);
+ if (!LI.covers(SR)) {
+ report("A Subrange is not covered by the main range", MF);
+ report_context(LI);
+ }
+ }
+
+ // Check the LI only has one connected component.
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF);
+ report_context(LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ errs() << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ errs() << ' ' << (*I)->id;
+ errs() << '\n';
+ }
+ }
+}
+
+namespace {
+ // FrameSetup and FrameDestroy can have zero adjustment, so using a single
+ // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the
+ // value is zero.
+ // We use a bool plus an integer to capture the stack state.
+ struct StackStateOfBB {
+ StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false),
+ ExitIsSetup(false) { }
+ StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) :
+ EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup),
+ ExitIsSetup(ExitSetup) { }
+ // Can be negative, which means we are setting up a frame.
+ int EntryValue;
+ int ExitValue;
+ bool EntryIsSetup;
+ bool ExitIsSetup;
+ };
+}
+
+/// Make sure on every path through the CFG, a FrameSetup <n> is always followed
+/// by a FrameDestroy <n>, stack adjustments are identical on all
+/// CFG edges to a merge point, and frame is destroyed at end of a return block.
+void MachineVerifier::verifyStackFrame() {
+ unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
+
+ SmallVector<StackStateOfBB, 8> SPState;
+ SPState.resize(MF->getNumBlockIDs());
+ SmallPtrSet<const MachineBasicBlock*, 8> Reachable;
+
+ // Visit the MBBs in DFS order.
+ for (df_ext_iterator<const MachineFunction*,
+ SmallPtrSet<const MachineBasicBlock*, 8> >
+ DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable);
+ DFI != DFE; ++DFI) {
+ const MachineBasicBlock *MBB = *DFI;
+
+ StackStateOfBB BBState;
+ // Check the exit state of the DFS stack predecessor.
+ if (DFI.getPathLength() >= 2) {
+ const MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
+ assert(Reachable.count(StackPred) &&
+ "DFS stack predecessor is already visited.\n");
+ BBState.EntryValue = SPState[StackPred->getNumber()].ExitValue;
+ BBState.EntryIsSetup = SPState[StackPred->getNumber()].ExitIsSetup;
+ BBState.ExitValue = BBState.EntryValue;
+ BBState.ExitIsSetup = BBState.EntryIsSetup;
+ }
+
+ // Update stack state by checking contents of MBB.
+ for (const auto &I : *MBB) {
+ if (I.getOpcode() == FrameSetupOpcode) {
+ // The first operand of a FrameOpcode should be i32.
+ int Size = I.getOperand(0).getImm();
+ assert(Size >= 0 &&
+ "Value should be non-negative in FrameSetup and FrameDestroy.\n");
+
+ if (BBState.ExitIsSetup)
+ report("FrameSetup is after another FrameSetup", &I);
+ BBState.ExitValue -= Size;
+ BBState.ExitIsSetup = true;
+ }
+
+ if (I.getOpcode() == FrameDestroyOpcode) {
+ // The first operand of a FrameOpcode should be i32.
+ int Size = I.getOperand(0).getImm();
+ assert(Size >= 0 &&
+ "Value should be non-negative in FrameSetup and FrameDestroy.\n");
+
+ if (!BBState.ExitIsSetup)
+ report("FrameDestroy is not after a FrameSetup", &I);
+ int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue :
+ BBState.ExitValue;
+ if (BBState.ExitIsSetup && AbsSPAdj != Size) {
+ report("FrameDestroy <n> is after FrameSetup <m>", &I);
+ errs() << "FrameDestroy <" << Size << "> is after FrameSetup <"
+ << AbsSPAdj << ">.\n";
+ }
+ BBState.ExitValue += Size;
+ BBState.ExitIsSetup = false;
+ }
+ }
+ SPState[MBB->getNumber()] = BBState;
+
+ // Make sure the exit state of any predecessor is consistent with the entry
+ // state.
+ for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+ E = MBB->pred_end(); I != E; ++I) {
+ if (Reachable.count(*I) &&
+ (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue ||
+ SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) {
+ report("The exit stack state of a predecessor is inconsistent.", MBB);
+ errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state ("
+ << SPState[(*I)->getNumber()].ExitValue << ", "
+ << SPState[(*I)->getNumber()].ExitIsSetup
+ << "), while BB#" << MBB->getNumber() << " has entry state ("
+ << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n";
+ }
+ }
+
+ // Make sure the entry state of any successor is consistent with the exit
+ // state.
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ if (Reachable.count(*I) &&
+ (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue ||
+ SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) {
+ report("The entry stack state of a successor is inconsistent.", MBB);
+ errs() << "Successor BB#" << (*I)->getNumber() << " has entry state ("
+ << SPState[(*I)->getNumber()].EntryValue << ", "
+ << SPState[(*I)->getNumber()].EntryIsSetup
+ << "), while BB#" << MBB->getNumber() << " has exit state ("
+ << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n";
+ }
+ }
+
+ // Make sure a basic block with return ends with zero stack adjustment.
+ if (!MBB->empty() && MBB->back().isReturn()) {
+ if (BBState.ExitIsSetup)
+ report("A return block ends with a FrameSetup.", MBB);
+ if (BBState.ExitValue)
+ report("A return block ends with a nonzero stack adjustment.", MBB);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 000000000000..0177e414f8d9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,196 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "phi-opt"
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+ class OptimizePHIs : public MachineFunctionPass {
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+
+ public:
+ static char ID; // Pass identification
+ OptimizePHIs() : MachineFunctionPass(ID) {
+ initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ private:
+ typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
+ typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+
+ bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+ InstrSet &PHIsInCycle);
+ bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+ bool OptimizeBB(MachineBasicBlock &MBB);
+ };
+}
+
+char OptimizePHIs::ID = 0;
+char &llvm::OptimizePHIsID = OptimizePHIs::ID;
+INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+ "Optimize machine instruction PHIs", false, false)
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(*Fn.getFunction()))
+ return false;
+
+ MRI = &Fn.getRegInfo();
+ TII = Fn.getSubtarget().getInstrInfo();
+
+ // Find dead PHI cycles and PHI cycles that can be replaced by a single
+ // value. InstCombine does these optimizations, but DAG legalization may
+ // introduce new opportunities, e.g., when i64 values are split up for
+ // 32-bit targets.
+ bool Changed = false;
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= OptimizeBB(*I);
+
+ return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs. If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+ unsigned &SingleValReg,
+ InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI).second)
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ // Scan the PHI operands.
+ for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ if (SrcReg == DstReg)
+ continue;
+ MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+ // Skip over register-to-register moves.
+ if (SrcMI && SrcMI->isCopy() &&
+ !SrcMI->getOperand(0).getSubReg() &&
+ !SrcMI->getOperand(1).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+ if (!SrcMI)
+ return false;
+
+ if (SrcMI->isPHI()) {
+ if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+ return false;
+ } else {
+ // Fail if there is more than one non-phi/non-move register.
+ if (SingleValReg != 0)
+ return false;
+ SingleValReg = SrcReg;
+ }
+ }
+ return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+ assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+ unsigned DstReg = MI->getOperand(0).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ "PHI destination is not a virtual register");
+
+ // See if we already saw this register.
+ if (!PHIsInCycle.insert(MI).second)
+ return true;
+
+ // Don't scan crazily complex things.
+ if (PHIsInCycle.size() == 16)
+ return false;
+
+ for (MachineInstr &UseMI : MRI->use_instructions(DstReg)) {
+ if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle))
+ return false;
+ }
+
+ return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator
+ MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+ MachineInstr *MI = &*MII++;
+ if (!MI->isPHI())
+ break;
+
+ // Check for single-value PHI cycles.
+ unsigned SingleValReg = 0;
+ InstrSet PHIsInCycle;
+ if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+ SingleValReg != 0) {
+ unsigned OldReg = MI->getOperand(0).getReg();
+ if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
+ continue;
+
+ MRI->replaceRegWith(OldReg, SingleValReg);
+ MI->eraseFromParent();
+ ++NumPHICycles;
+ Changed = true;
+ continue;
+ }
+
+ // Check for dead PHI cycles.
+ PHIsInCycle.clear();
+ if (IsDeadPHICycle(MI, PHIsInCycle)) {
+ for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
+ PI != PE; ++PI) {
+ MachineInstr *PhiMI = *PI;
+ if (&*MII == PhiMI)
+ ++MII;
+ PhiMI->eraseFromParent();
+ }
+ ++NumDeadPHICycles;
+ Changed = true;
+ }
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 000000000000..b8d54315d148
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,652 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions. This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "phielim"
+
+static cl::opt<bool>
+DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
+ cl::Hidden, cl::desc("Disable critical edge splitting "
+ "during PHI elimination"));
+
+static cl::opt<bool>
+SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
+ cl::Hidden, cl::desc("Split all critical edges during "
+ "PHI elimination"));
+
+static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
+ "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden,
+ cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true."));
+
+namespace {
+ class PHIElimination : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+ LiveVariables *LV;
+ LiveIntervals *LIS;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PHIElimination() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator LastPHIIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineLoopInfo *MLI);
+
+ // These functions are temporary abstractions around LiveVariables and
+ // LiveIntervals, so they can go away when LiveVariables does.
+ bool isLiveIn(unsigned Reg, const MachineBasicBlock *MBB);
+ bool isLiveOutPastPHIs(unsigned Reg, const MachineBasicBlock *MBB);
+
+ typedef std::pair<unsigned, unsigned> BBVRegPair;
+ typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+ // Map reusable lowered PHI node -> incoming join register.
+ typedef DenseMap<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait> LoweredPHIMap;
+ LoweredPHIMap LoweredPHIs;
+ };
+}
+
+STATISTIC(NumLowered, "Number of phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
+STATISTIC(NumReused, "Number of reused lowered phis");
+
+char PHIElimination::ID = 0;
+char& llvm::PHIEliminationID = PHIElimination::ID;
+
+INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation", false, false)
+
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addUsedIfAvailable<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+
+ bool Changed = false;
+
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
+ // Split critical edges to help the coalescer. This does not yet support
+ // updating LiveIntervals, so we disable it.
+ if (!DisableEdgeSplitting && (LV || LIS)) {
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ for (auto &MBB : MF)
+ Changed |= SplitPHIEdges(MF, MBB, MLI);
+ }
+
+ // Populate VRegPHIUseCount
+ analyzePHINodes(MF);
+
+ // Eliminate PHI instructions by inserting copies into predecessor blocks.
+ for (auto &MBB : MF)
+ Changed |= EliminatePHINodes(MF, MBB);
+
+ // Remove dead IMPLICIT_DEF instructions.
+ for (MachineInstr *DefMI : ImpDefs) {
+ unsigned DefReg = DefMI->getOperand(0).getReg();
+ if (MRI->use_nodbg_empty(DefReg)) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*DefMI);
+ DefMI->eraseFromParent();
+ }
+ }
+
+ // Clean up the lowered PHI instructions.
+ for (auto &I : LoweredPHIs) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*I.first);
+ MF.DeleteMachineInstr(I.first);
+ }
+
+ LoweredPHIs.clear();
+ ImpDefs.clear();
+ VRegPHIUseCount.clear();
+
+ return Changed;
+}
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (MBB.empty() || !MBB.front().isPHI())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ // Get an iterator to the first instruction after the last PHI node (this may
+ // also be the end of the basic block).
+ MachineBasicBlock::iterator LastPHIIt =
+ std::prev(MBB.SkipPHIsAndLabels(MBB.begin()));
+
+ while (MBB.front().isPHI())
+ LowerPHINode(MBB, LastPHIIt);
+
+ return true;
+}
+
+/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs.
+/// This includes registers with no defs.
+static bool isImplicitlyDefined(unsigned VirtReg,
+ const MachineRegisterInfo *MRI) {
+ for (MachineInstr &DI : MRI->def_instructions(VirtReg))
+ if (!DI.isImplicitDef())
+ return false;
+ return true;
+}
+
+/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
+/// are implicit_def's.
+static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
+ const MachineRegisterInfo *MRI) {
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI))
+ return false;
+ return true;
+}
+
+
+/// LowerPHINode - Lower the PHI node at the top of the specified block,
+///
+void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator LastPHIIt) {
+ ++NumLowered;
+
+ MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt);
+
+ // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+ MachineInstr *MPhi = MBB.remove(&*MBB.begin());
+
+ unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+ unsigned DestReg = MPhi->getOperand(0).getReg();
+ assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
+ bool isDead = MPhi->getOperand(0).isDead();
+
+ // Create a new register for the incoming PHI arguments.
+ MachineFunction &MF = *MBB.getParent();
+ unsigned IncomingReg = 0;
+ bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
+
+ // Insert a register to register copy at the top of the current block (but
+ // after any remaining phi nodes) which copies the new incoming register
+ // into the phi node destination.
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ if (isSourceDefinedByImplicitDef(MPhi, MRI))
+ // If all sources of a PHI node are implicit_def, just emit an
+ // implicit_def instead of a copy.
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ else {
+ // Can we reuse an earlier PHI node? This only happens for critical edges,
+ // typically those created by tail duplication.
+ unsigned &entry = LoweredPHIs[MPhi];
+ if (entry) {
+ // An identical PHI node was already lowered. Reuse the incoming register.
+ IncomingReg = entry;
+ reusedIncoming = true;
+ ++NumReused;
+ DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
+ } else {
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+ entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+ }
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), DestReg)
+ .addReg(IncomingReg);
+ }
+
+ // Update live variable information if there is any.
+ if (LV) {
+ MachineInstr &PHICopy = *std::prev(AfterPHIsIt);
+
+ if (IncomingReg) {
+ LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
+
+ // Increment use count of the newly created virtual register.
+ LV->setPHIJoin(IncomingReg);
+
+ // When we are reusing the incoming register, it may already have been
+ // killed in this block. The old kill will also have been inserted at
+ // AfterPHIsIt, so it appears before the current PHICopy.
+ if (reusedIncoming)
+ if (MachineInstr *OldKill = VI.findKill(&MBB)) {
+ DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+ LV->removeVirtualRegisterKilled(IncomingReg, *OldKill);
+ DEBUG(MBB.dump());
+ }
+
+ // Add information to LiveVariables to know that the incoming value is
+ // killed. Note that because the value is defined in several places (once
+ // each for each incoming block), the "def" block and instruction fields
+ // for the VarInfo is not filled in.
+ LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+ }
+
+ // Since we are going to be deleting the PHI node, if it is the last use of
+ // any registers, or if the value itself is dead, we need to move this
+ // information over to the new copy we just inserted.
+ LV->removeVirtualRegistersKilled(*MPhi);
+
+ // If the result is dead, update LV.
+ if (isDead) {
+ LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->removeVirtualRegisterDead(DestReg, *MPhi);
+ }
+ }
+
+ // Update LiveIntervals for the new copy or implicit def.
+ if (LIS) {
+ SlotIndex DestCopyIndex =
+ LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt));
+
+ SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
+ if (IncomingReg) {
+ // Add the region from the beginning of MBB to the copy instruction to
+ // IncomingReg's live interval.
+ LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg);
+ VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex);
+ if (!IncomingVNI)
+ IncomingVNI = IncomingLI.getNextValue(MBBStartIndex,
+ LIS->getVNInfoAllocator());
+ IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex,
+ DestCopyIndex.getRegSlot(),
+ IncomingVNI));
+ }
+
+ LiveInterval &DestLI = LIS->getInterval(DestReg);
+ assert(DestLI.begin() != DestLI.end() &&
+ "PHIs should have nonempty LiveIntervals.");
+ if (DestLI.endIndex().isDead()) {
+ // A dead PHI's live range begins and ends at the start of the MBB, but
+ // the lowered copy, which will still be dead, needs to begin and end at
+ // the copy instruction.
+ VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex);
+ assert(OrigDestVNI && "PHI destination should be live at block entry.");
+ DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot());
+ DestLI.createDeadDef(DestCopyIndex.getRegSlot(),
+ LIS->getVNInfoAllocator());
+ DestLI.removeValNo(OrigDestVNI);
+ } else {
+ // Otherwise, remove the region from the beginning of MBB to the copy
+ // instruction from DestReg's live interval.
+ DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot());
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
+ assert(DestVNI && "PHI destination should be live at its definition.");
+ DestVNI->def = DestCopyIndex.getRegSlot();
+ }
+ }
+
+ // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+ --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+
+ // Now loop over all of the incoming arguments, changing them to copy into the
+ // IncomingReg register in the corresponding predecessor basic block.
+ SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+ for (int i = NumSrcs - 1; i >= 0; --i) {
+ unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+ bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
+ isImplicitlyDefined(SrcReg, MRI);
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+ // path the PHI.
+ MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+ // Check to make sure we haven't already emitted the copy for this block.
+ // This can happen because PHI nodes may have multiple entries for the same
+ // basic block.
+ if (!MBBsInsertedInto.insert(&opBlock).second)
+ continue; // If the copy has already been emitted, we're done.
+
+ // Find a safe location to insert the copy, this may be the first terminator
+ // in the block (or end()).
+ MachineBasicBlock::iterator InsertPos =
+ findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
+
+ // Insert the copy.
+ MachineInstr *NewSrcInstr = nullptr;
+ if (!reusedIncoming && IncomingReg) {
+ if (SrcUndef) {
+ // The source register is undefined, so there is no need for a real
+ // COPY, but we still need to ensure joint dominance by defs.
+ // Insert an IMPLICIT_DEF instruction.
+ NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF),
+ IncomingReg);
+
+ // Clean up the old implicit-def, if there even was one.
+ if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg))
+ if (DefMI->isImplicitDef())
+ ImpDefs.insert(DefMI);
+ } else {
+ NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IncomingReg)
+ .addReg(SrcReg, 0, SrcSubReg);
+ }
+ }
+
+ // We only need to update the LiveVariables kill of SrcReg if this was the
+ // last PHI use of SrcReg to be lowered on this CFG edge and it is not live
+ // out of the predecessor. We can also ignore undef sources.
+ if (LV && !SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] &&
+ !LV->isLiveOut(SrcReg, opBlock)) {
+ // We want to be able to insert a kill of the register if this PHI (aka,
+ // the copy we just inserted) is the last use of the source value. Live
+ // variable analysis conservatively handles this by saying that the value
+ // is live until the end of the block the PHI entry lives in. If the value
+ // really is dead at the PHI copy, there will be no successor blocks which
+ // have the value live-in.
+
+ // Okay, if we now know that the value is not live out of the block, we
+ // can add a kill marker in this block saying that it kills the incoming
+ // value!
+
+ // In our final twist, we have to decide which instruction kills the
+ // register. In most cases this is the copy, however, terminator
+ // instructions at the end of the block may also use the value. In this
+ // case, we should mark the last such terminator as being the killing
+ // block, not the copy.
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't insert a copy this time.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = std::prev(InsertPos);
+ }
+ }
+ assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
+
+ // Finally, mark it killed.
+ LV->addVirtualRegisterKilled(SrcReg, *KillInst);
+
+ // This vreg no longer lives all of the way through opBlock.
+ unsigned opBlockNum = opBlock.getNumber();
+ LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
+ }
+
+ if (LIS) {
+ if (NewSrcInstr) {
+ LIS->InsertMachineInstrInMaps(*NewSrcInstr);
+ LIS->addSegmentToEndOfBlock(IncomingReg, *NewSrcInstr);
+ }
+
+ if (!SrcUndef &&
+ !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) {
+ LiveInterval &SrcLI = LIS->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
+ SE = opBlock.succ_end(); SI != SE; ++SI) {
+ SlotIndex startIdx = LIS->getMBBStartIdx(*SI);
+ VNInfo *VNI = SrcLI.getVNInfoAt(startIdx);
+
+ // Definitions by other PHIs are not truly live-in for our purposes.
+ if (VNI && VNI->def != startIdx) {
+ isLiveOut = true;
+ break;
+ }
+ }
+
+ if (!isLiveOut) {
+ MachineBasicBlock::iterator KillInst = opBlock.end();
+ MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
+ for (MachineBasicBlock::iterator Term = FirstTerm;
+ Term != opBlock.end(); ++Term) {
+ if (Term->readsRegister(SrcReg))
+ KillInst = Term;
+ }
+
+ if (KillInst == opBlock.end()) {
+ // No terminator uses the register.
+
+ if (reusedIncoming || !IncomingReg) {
+ // We may have to rewind a bit if we didn't just insert a copy.
+ KillInst = FirstTerm;
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
+ break;
+ }
+ } else {
+ // We just inserted this copy.
+ KillInst = std::prev(InsertPos);
+ }
+ }
+ assert(KillInst->readsRegister(SrcReg) &&
+ "Cannot find kill instruction");
+
+ SlotIndex LastUseIndex = LIS->getInstructionIndex(*KillInst);
+ SrcLI.removeSegment(LastUseIndex.getRegSlot(),
+ LIS->getMBBEndIdx(&opBlock));
+ }
+ }
+ }
+ }
+
+ // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
+ if (reusedIncoming || !IncomingReg) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MPhi);
+ MF.DeleteMachineInstr(MPhi);
+ }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+ for (const auto &MBB : MF)
+ for (const auto &BBI : MBB) {
+ if (!BBI.isPHI())
+ break;
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2)
+ ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(),
+ BBI.getOperand(i).getReg())];
+ }
+}
+
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineLoopInfo *MLI) {
+ if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad())
+ return false; // Quick exit for basic blocks without PHIs.
+
+ const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr;
+ bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+ // Is there a critical edge from PreMBB to MBB?
+ if (PreMBB->succ_size() == 1)
+ continue;
+
+ // Avoid splitting backedges of loops. It would introduce small
+ // out-of-line blocks into the loop which is very bad for code placement.
+ if (PreMBB == &MBB && !SplitAllCriticalEdges)
+ continue;
+ const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : nullptr;
+ if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges)
+ continue;
+
+ // LV doesn't consider a phi use live-out, so isLiveOut only returns true
+ // when the source register is live-out for some other reason than a phi
+ // use. That means the copy we will insert in PreMBB won't be a kill, and
+ // there is a risk it may not be coalesced away.
+ //
+ // If the copy would be a kill, there is no need to split the edge.
+ bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB);
+ if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)
+ continue;
+ if (ShouldSplit) {
+ DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+ << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+ << ": " << *BBI);
+ }
+
+ // If Reg is not live-in to MBB, it means it must be live-in to some
+ // other PreMBB successor, and we can avoid the interference by splitting
+ // the edge.
+ //
+ // If Reg *is* live-in to MBB, the interference is inevitable and a copy
+ // is likely to be left after coalescing. If we are looking at a loop
+ // exiting edge, split it so we won't insert code in the loop, otherwise
+ // don't bother.
+ ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB);
+
+ // Check for a loop exiting edge.
+ if (!ShouldSplit && CurLoop != PreLoop) {
+ DEBUG({
+ dbgs() << "Split wouldn't help, maybe avoid loop copies?\n";
+ if (PreLoop) dbgs() << "PreLoop: " << *PreLoop;
+ if (CurLoop) dbgs() << "CurLoop: " << *CurLoop;
+ });
+ // This edge could be entering a loop, exiting a loop, or it could be
+ // both: Jumping directly form one loop to the header of a sibling
+ // loop.
+ // Split unless this edge is entering CurLoop from an outer loop.
+ ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
+ }
+ if (!ShouldSplit && !SplitAllCriticalEdges)
+ continue;
+ if (!PreMBB->SplitCriticalEdge(&MBB, *this)) {
+ DEBUG(dbgs() << "Failed to split critical edge.\n");
+ continue;
+ }
+ Changed = true;
+ ++NumCriticalEdgesSplit;
+ }
+ }
+ return Changed;
+}
+
+bool PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveIn() requires either LiveVariables or LiveIntervals");
+ if (LIS)
+ return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB);
+ else
+ return LV->isLiveIn(Reg, *MBB);
+}
+
+bool PHIElimination::isLiveOutPastPHIs(unsigned Reg,
+ const MachineBasicBlock *MBB) {
+ assert((LV || LIS) &&
+ "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
+ // LiveVariables considers uses in PHIs to be in the predecessor basic block,
+ // so that a register used only in a PHI is not live out of the block. In
+ // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than
+ // in the predecessor basic block, so that a register used only in a PHI is live
+ // out of the block.
+ if (LIS) {
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ for (const MachineBasicBlock *SI : MBB->successors())
+ if (LI.liveAt(LIS->getMBBStartIdx(SI)))
+ return true;
+ return false;
+ } else {
+ return LV->isLiveOut(Reg, *MBB);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 000000000000..4cabc3a8c1fd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,59 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB->empty())
+ return MBB->begin();
+
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB->isEHPad())
+ return MBB->getFirstTerminator();
+
+ // Discover any defs/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+ for (MachineInstr &RI : MRI.reg_instructions(SrcReg)) {
+ if (RI.getParent() == MBB)
+ DefUsesInMBB.insert(&RI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No defs. Insert the copy at the start of the basic block.
+ InsertPoint = MBB->begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the def/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last def/use.
+ InsertPoint = MBB->end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 000000000000..b997d7ac5f4f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+ /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
+ MachineBasicBlock::iterator
+ findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg);
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
new file mode 100644
index 000000000000..ccdaec1bc180
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
@@ -0,0 +1,99 @@
+//===-- ParallelCG.cpp ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that can be used for parallel code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ParallelCG.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+
+using namespace llvm;
+
+static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
+ function_ref<std::unique_ptr<TargetMachine>()> TMFactory,
+ TargetMachine::CodeGenFileType FileType) {
+ std::unique_ptr<TargetMachine> TM = TMFactory();
+ legacy::PassManager CodeGenPasses;
+ if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType))
+ report_fatal_error("Failed to setup codegen");
+ CodeGenPasses.run(*M);
+}
+
+std::unique_ptr<Module> llvm::splitCodeGen(
+ std::unique_ptr<Module> M, ArrayRef<llvm::raw_pwrite_stream *> OSs,
+ ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
+ const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
+ TargetMachine::CodeGenFileType FileType, bool PreserveLocals) {
+ assert(BCOSs.empty() || BCOSs.size() == OSs.size());
+
+ if (OSs.size() == 1) {
+ if (!BCOSs.empty())
+ WriteBitcodeToFile(M.get(), *BCOSs[0]);
+ codegen(M.get(), *OSs[0], TMFactory, FileType);
+ return M;
+ }
+
+ // Create ThreadPool in nested scope so that threads will be joined
+ // on destruction.
+ {
+ ThreadPool CodegenThreadPool(OSs.size());
+ int ThreadCount = 0;
+
+ SplitModule(
+ std::move(M), OSs.size(),
+ [&](std::unique_ptr<Module> MPart) {
+ // We want to clone the module in a new context to multi-thread the
+ // codegen. We do it by serializing partition modules to bitcode
+ // (while still on the main thread, in order to avoid data races) and
+ // spinning up new threads which deserialize the partitions into
+ // separate contexts.
+ // FIXME: Provide a more direct way to do this in LLVM.
+ SmallString<0> BC;
+ raw_svector_ostream BCOS(BC);
+ WriteBitcodeToFile(MPart.get(), BCOS);
+
+ if (!BCOSs.empty()) {
+ BCOSs[ThreadCount]->write(BC.begin(), BC.size());
+ BCOSs[ThreadCount]->flush();
+ }
+
+ llvm::raw_pwrite_stream *ThreadOS = OSs[ThreadCount++];
+ // Enqueue the task
+ CodegenThreadPool.async(
+ [TMFactory, FileType, ThreadOS](const SmallString<0> &BC) {
+ LLVMContext Ctx;
+ ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
+ MemoryBufferRef(StringRef(BC.data(), BC.size()),
+ "<split-module>"),
+ Ctx);
+ if (!MOrErr)
+ report_fatal_error("Failed to read bitcode");
+ std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
+
+ codegen(MPartInCtx.get(), *ThreadOS, TMFactory, FileType);
+ },
+ // Pass BC using std::move to ensure that it get moved rather than
+ // copied into the thread's context.
+ std::move(BC));
+ },
+ PreserveLocals);
+ }
+
+ return {};
+}
diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
new file mode 100644
index 000000000000..32468c90b864
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -0,0 +1,88 @@
+//===-- PatchableFunction.cpp - Patchable prologues for LLVM -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements edits function bodies in place to support the
+// "patchable-function" attribute.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+struct PatchableFunction : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ PatchableFunction() : MachineFunctionPass(ID) {
+ initializePatchableFunctionPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+};
+}
+
+/// Returns true if instruction \p MI will not result in actual machine code
+/// instructions.
+static bool doesNotGeneratecode(const MachineInstr &MI) {
+ // TODO: Introduce an MCInstrDesc flag for this
+ switch (MI.getOpcode()) {
+ default: return false;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::CFI_INSTRUCTION:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::GC_LABEL:
+ case TargetOpcode::DBG_VALUE:
+ return true;
+ }
+}
+
+bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getFunction()->hasFnAttribute("patchable-function"))
+ return false;
+
+#ifndef NDEBUG
+ Attribute PatchAttr = MF.getFunction()->getFnAttribute("patchable-function");
+ StringRef PatchType = PatchAttr.getValueAsString();
+ assert(PatchType == "prologue-short-redirect" && "Only possibility today!");
+#endif
+
+ auto &FirstMBB = *MF.begin();
+ MachineBasicBlock::iterator FirstActualI = FirstMBB.begin();
+ for (; doesNotGeneratecode(*FirstActualI); ++FirstActualI)
+ assert(FirstActualI != FirstMBB.end());
+
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ auto MIB = BuildMI(FirstMBB, FirstActualI, FirstActualI->getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_OP))
+ .addImm(2)
+ .addImm(FirstActualI->getOpcode());
+
+ for (auto &MO : FirstActualI->operands())
+ MIB.addOperand(MO);
+
+ FirstActualI->eraseFromParent();
+ MF.ensureAlignment(4);
+ return true;
+}
+
+char PatchableFunction::ID = 0;
+char &llvm::PatchableFunctionID = PatchableFunction::ID;
+INITIALIZE_PASS(PatchableFunction, "patchable-function",
+ "Implement the 'patchable-function' attribute", false, false)
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
new file mode 100644
index 000000000000..60b27dd75a89
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -0,0 +1,1947 @@
+//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Perform peephole optimizations on the machine code:
+//
+// - Optimize Extensions
+//
+// Optimization of sign / zero extension instructions. It may be extended to
+// handle other instructions with similar properties.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This optimization
+// will replace some uses of the pre-extension value with uses of the
+// sub-register of the results.
+//
+// - Optimize Comparisons
+//
+// Optimization of comparison instructions. For instance, in this code:
+//
+// sub r1, 1
+// cmp r1, 0
+// bz L1
+//
+// If the "sub" instruction all ready sets (or could be modified to set) the
+// same flag that the "cmp" instruction sets and that "bz" uses, then we can
+// eliminate the "cmp" instruction.
+//
+// Another instance, in this code:
+//
+// sub r1, r3 | sub r1, imm
+// cmp r3, r1 or cmp r1, r3 | cmp r1, imm
+// bge L1
+//
+// If the branch instruction can use flag from "sub", then we can replace
+// "sub" with "subs" and eliminate the "cmp" instruction.
+//
+// - Optimize Loads:
+//
+// Loads that can be folded into a later instruction. A load is foldable
+// if it loads to virtual registers and the virtual register defined has
+// a single use.
+//
+// - Optimize Copies and Bitcast (more generally, target specific copies):
+//
+// Rewrite copies and bitcasts to avoid cross register bank copies
+// when possible.
+// E.g., Consider the following example, where capital and lower
+// letters denote different register file:
+// b = copy A <-- cross-bank copy
+// C = copy b <-- cross-bank copy
+// =>
+// b = copy A <-- cross-bank copy
+// C = copy A <-- same-bank copy
+//
+// E.g., for bitcast:
+// b = bitcast A <-- cross-bank copy
+// C = bitcast b <-- cross-bank copy
+// =>
+// b = bitcast A <-- cross-bank copy
+// C = copy A <-- same-bank copy
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <utility>
+using namespace llvm;
+
+#define DEBUG_TYPE "peephole-opt"
+
+// Optimize Extensions
+static cl::opt<bool>
+Aggressive("aggressive-ext-opt", cl::Hidden,
+ cl::desc("Aggressive extension optimization"));
+
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+ cl::desc("Disable the peephole optimizer"));
+
+static cl::opt<bool>
+DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
+ cl::desc("Disable advanced copy optimization"));
+
+static cl::opt<bool> DisableNAPhysCopyOpt(
+ "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
+ cl::desc("Disable non-allocatable physical register copy optimization"));
+
+// Limit the number of PHI instructions to process
+// in PeepholeOptimizer::getNextSource.
+static cl::opt<unsigned> RewritePHILimit(
+ "rewrite-phi-limit", cl::Hidden, cl::init(10),
+ cl::desc("Limit the length of PHI chains to lookup"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+STATISTIC(NumCmps, "Number of compares eliminated");
+STATISTIC(NumImmFold, "Number of move immediate folded");
+STATISTIC(NumLoadFold, "Number of loads folded");
+STATISTIC(NumSelects, "Number of selects optimized");
+STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
+STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
+STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
+
+namespace {
+ class ValueTrackerResult;
+
+ class PeepholeOptimizer : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT; // Machine dominator tree
+
+ public:
+ static char ID; // Pass identification
+ PeepholeOptimizer() : MachineFunctionPass(ID) {
+ initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ if (Aggressive) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ }
+
+ /// \brief Track Def -> Use info used for rewriting copies.
+ typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
+ RewriteMapTy;
+
+ private:
+ bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSetImpl<MachineInstr*> &LocalMIs);
+ bool optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool optimizeCondBranch(MachineInstr *MI);
+ bool optimizeCoalescableCopy(MachineInstr *MI);
+ bool optimizeUncoalescableCopy(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs);
+ bool findNextSource(unsigned Reg, unsigned SubReg,
+ RewriteMapTy &RewriteMap);
+ bool isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+
+ /// \brief If copy instruction \p MI is a virtual register copy, track it in
+ /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
+ /// previously seen as a copy, replace the uses of this copy with the
+ /// previously seen copy's destination register.
+ bool foldRedundantCopy(MachineInstr *MI,
+ SmallSet<unsigned, 4> &CopySrcRegs,
+ DenseMap<unsigned, MachineInstr *> &CopyMIs);
+
+ /// \brief Is the register \p Reg a non-allocatable physical register?
+ bool isNAPhysCopy(unsigned Reg);
+
+ /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical
+ /// register copy, track it in the \p NAPhysToVirtMIs map. If this
+ /// non-allocatable physical register was previously copied to a virtual
+ /// registered and hasn't been clobbered, the virt->phys copy can be
+ /// deleted.
+ bool foldRedundantNAPhysCopy(
+ MachineInstr *MI,
+ DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs);
+
+ bool isLoadFoldable(MachineInstr *MI,
+ SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
+
+ /// \brief Check whether \p MI is understood by the register coalescer
+ /// but may require some rewriting.
+ bool isCoalescableCopy(const MachineInstr &MI) {
+ // SubregToRegs are not interesting, because they are already register
+ // coalescer friendly.
+ return MI.isCopy() || (!DisableAdvCopyOpt &&
+ (MI.isRegSequence() || MI.isInsertSubreg() ||
+ MI.isExtractSubreg()));
+ }
+
+ /// \brief Check whether \p MI is a copy like instruction that is
+ /// not recognized by the register coalescer.
+ bool isUncoalescableCopy(const MachineInstr &MI) {
+ return MI.isBitcast() ||
+ (!DisableAdvCopyOpt &&
+ (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+ MI.isExtractSubregLike()));
+ }
+ };
+
+ /// \brief Helper class to hold a reply for ValueTracker queries. Contains the
+ /// returned sources for a given search and the instructions where the sources
+ /// were tracked from.
+ class ValueTrackerResult {
+ private:
+ /// Track all sources found by one ValueTracker query.
+ SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs;
+
+ /// Instruction using the sources in 'RegSrcs'.
+ const MachineInstr *Inst;
+
+ public:
+ ValueTrackerResult() : Inst(nullptr) {}
+ ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) {
+ addSource(Reg, SubReg);
+ }
+
+ bool isValid() const { return getNumSources() > 0; }
+
+ void setInst(const MachineInstr *I) { Inst = I; }
+ const MachineInstr *getInst() const { return Inst; }
+
+ void clear() {
+ RegSrcs.clear();
+ Inst = nullptr;
+ }
+
+ void addSource(unsigned SrcReg, unsigned SrcSubReg) {
+ RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg));
+ }
+
+ void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
+ assert(Idx < getNumSources() && "Reg pair source out of index");
+ RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg);
+ }
+
+ int getNumSources() const { return RegSrcs.size(); }
+
+ unsigned getSrcReg(int Idx) const {
+ assert(Idx < getNumSources() && "Reg source out of index");
+ return RegSrcs[Idx].Reg;
+ }
+
+ unsigned getSrcSubReg(int Idx) const {
+ assert(Idx < getNumSources() && "SubReg source out of index");
+ return RegSrcs[Idx].SubReg;
+ }
+
+ bool operator==(const ValueTrackerResult &Other) {
+ if (Other.getInst() != getInst())
+ return false;
+
+ if (Other.getNumSources() != getNumSources())
+ return false;
+
+ for (int i = 0, e = Other.getNumSources(); i != e; ++i)
+ if (Other.getSrcReg(i) != getSrcReg(i) ||
+ Other.getSrcSubReg(i) != getSrcSubReg(i))
+ return false;
+ return true;
+ }
+ };
+
+ /// \brief Helper class to track the possible sources of a value defined by
+ /// a (chain of) copy related instructions.
+ /// Given a definition (instruction and definition index), this class
+ /// follows the use-def chain to find successive suitable sources.
+ /// The given source can be used to rewrite the definition into
+ /// def = COPY src.
+ ///
+ /// For instance, let us consider the following snippet:
+ /// v0 =
+ /// v2 = INSERT_SUBREG v1, v0, sub0
+ /// def = COPY v2.sub0
+ ///
+ /// Using a ValueTracker for def = COPY v2.sub0 will give the following
+ /// suitable sources:
+ /// v2.sub0 and v0.
+ /// Then, def can be rewritten into def = COPY v0.
+ class ValueTracker {
+ private:
+ /// The current point into the use-def chain.
+ const MachineInstr *Def;
+ /// The index of the definition in Def.
+ unsigned DefIdx;
+ /// The sub register index of the definition.
+ unsigned DefSubReg;
+ /// The register where the value can be found.
+ unsigned Reg;
+ /// Specifiy whether or not the value tracking looks through
+ /// complex instructions. When this is false, the value tracker
+ /// bails on everything that is not a copy or a bitcast.
+ ///
+ /// Note: This could have been implemented as a specialized version of
+ /// the ValueTracker class but that would have complicated the code of
+ /// the users of this class.
+ bool UseAdvancedTracking;
+ /// MachineRegisterInfo used to perform tracking.
+ const MachineRegisterInfo &MRI;
+ /// Optional TargetInstrInfo used to perform some complex
+ /// tracking.
+ const TargetInstrInfo *TII;
+
+ /// \brief Dispatcher to the right underlying implementation of
+ /// getNextSource.
+ ValueTrackerResult getNextSourceImpl();
+ /// \brief Specialized version of getNextSource for Copy instructions.
+ ValueTrackerResult getNextSourceFromCopy();
+ /// \brief Specialized version of getNextSource for Bitcast instructions.
+ ValueTrackerResult getNextSourceFromBitcast();
+ /// \brief Specialized version of getNextSource for RegSequence
+ /// instructions.
+ ValueTrackerResult getNextSourceFromRegSequence();
+ /// \brief Specialized version of getNextSource for InsertSubreg
+ /// instructions.
+ ValueTrackerResult getNextSourceFromInsertSubreg();
+ /// \brief Specialized version of getNextSource for ExtractSubreg
+ /// instructions.
+ ValueTrackerResult getNextSourceFromExtractSubreg();
+ /// \brief Specialized version of getNextSource for SubregToReg
+ /// instructions.
+ ValueTrackerResult getNextSourceFromSubregToReg();
+ /// \brief Specialized version of getNextSource for PHI instructions.
+ ValueTrackerResult getNextSourceFromPHI();
+
+ public:
+ /// \brief Create a ValueTracker instance for the value defined by \p Reg.
+ /// \p DefSubReg represents the sub register index the value tracker will
+ /// track. It does not need to match the sub register index used in the
+ /// definition of \p Reg.
+ /// \p UseAdvancedTracking specifies whether or not the value tracker looks
+ /// through complex instructions. By default (false), it handles only copy
+ /// and bitcast instructions.
+ /// If \p Reg is a physical register, a value tracker constructed with
+ /// this constructor will not find any alternative source.
+ /// Indeed, when \p Reg is a physical register that constructor does not
+ /// know which definition of \p Reg it should track.
+ /// Use the next constructor to track a physical register.
+ ValueTracker(unsigned Reg, unsigned DefSubReg,
+ const MachineRegisterInfo &MRI,
+ bool UseAdvancedTracking = false,
+ const TargetInstrInfo *TII = nullptr)
+ : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg),
+ UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Def = MRI.getVRegDef(Reg);
+ DefIdx = MRI.def_begin(Reg).getOperandNo();
+ }
+ }
+
+ /// \brief Create a ValueTracker instance for the value defined by
+ /// the pair \p MI, \p DefIdx.
+ /// Unlike the other constructor, the value tracker produced by this one
+ /// may be able to find a new source when the definition is a physical
+ /// register.
+ /// This could be useful to rewrite target specific instructions into
+ /// generic copy instructions.
+ ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
+ const MachineRegisterInfo &MRI,
+ bool UseAdvancedTracking = false,
+ const TargetInstrInfo *TII = nullptr)
+ : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
+ UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
+ assert(DefIdx < Def->getDesc().getNumDefs() &&
+ Def->getOperand(DefIdx).isReg() && "Invalid definition");
+ Reg = Def->getOperand(DefIdx).getReg();
+ }
+
+ /// \brief Following the use-def chain, get the next available source
+ /// for the tracked value.
+ /// \return A ValueTrackerResult containing a set of registers
+ /// and sub registers with tracked values. A ValueTrackerResult with
+ /// an empty set of registers means no source was found.
+ ValueTrackerResult getNextSource();
+
+ /// \brief Get the last register where the initial value can be found.
+ /// Initially this is the register of the definition.
+ /// Then, after each successful call to getNextSource, this is the
+ /// register of the last source.
+ unsigned getReg() const { return Reg; }
+ };
+}
+
+char PeepholeOptimizer::ID = 0;
+char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
+ "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
+ "Peephole Optimizations", false, false)
+
+/// If instruction is a copy-like instruction, i.e. it reads a single register
+/// and writes a single register and it does not modify the source, and if the
+/// source value is preserved as a sub-register of the result, then replace all
+/// reachable uses of the source with the subreg of the result.
+///
+/// Do not generate an EXTRACT that is used only in a debug use, as this changes
+/// the code. Since this code does not currently share EXTRACTs, just ignore all
+/// debug uses.
+bool PeepholeOptimizer::
+optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
+ unsigned SrcReg, DstReg, SubIdx;
+ if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
+ return false;
+
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ if (MRI->hasOneNonDBGUse(SrcReg))
+ // No other uses.
+ return false;
+
+ // Ensure DstReg can get a register class that actually supports
+ // sub-registers. Don't change the class until we commit.
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
+ if (!DstRC)
+ return false;
+
+ // The ext instr may be operating on a sub-register of SrcReg as well.
+ // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
+ // register.
+ // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
+ // SrcReg:SubIdx should be replaced.
+ bool UseSrcSubIdx =
+ TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
+
+ // The source has other uses. See if we can replace the other uses with use of
+ // the result of the extension.
+ SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+ for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
+ ReachedBBs.insert(UI.getParent());
+
+ // Uses that are in the same BB of uses of the result of the instruction.
+ SmallVector<MachineOperand*, 8> Uses;
+
+ // Uses that the result of the instruction can reach.
+ SmallVector<MachineOperand*, 8> ExtendedUses;
+
+ bool ExtendLife = true;
+ for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
+ MachineInstr *UseMI = UseMO.getParent();
+ if (UseMI == MI)
+ continue;
+
+ if (UseMI->isPHI()) {
+ ExtendLife = false;
+ continue;
+ }
+
+ // Only accept uses of SrcReg:SubIdx.
+ if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
+ continue;
+
+ // It's an error to translate this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+ //
+ // into this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1027 = COPY %reg1025:4
+ // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+ //
+ // The problem here is that SUBREG_TO_REG is there to assert that an
+ // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+ // the COPY here, it will give us the value after the <sext>, not the
+ // original value of %reg1024 before <sext>.
+ if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ continue;
+
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ // Local uses that come after the extension.
+ if (!LocalMIs.count(UseMI))
+ Uses.push_back(&UseMO);
+ } else if (ReachedBBs.count(UseMBB)) {
+ // Non-local uses where the result of the extension is used. Always
+ // replace these unless it's a PHI.
+ Uses.push_back(&UseMO);
+ } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
+ // We may want to extend the live range of the extension result in order
+ // to replace these uses.
+ ExtendedUses.push_back(&UseMO);
+ } else {
+ // Both will be live out of the def MBB anyway. Don't extend live range of
+ // the extension result.
+ ExtendLife = false;
+ break;
+ }
+ }
+
+ if (ExtendLife && !ExtendedUses.empty())
+ // Extend the liveness of the extension result.
+ Uses.append(ExtendedUses.begin(), ExtendedUses.end());
+
+ // Now replace all uses.
+ bool Changed = false;
+ if (!Uses.empty()) {
+ SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+
+ // Look for PHI uses of the extended result, we don't want to extend the
+ // liveness of a PHI input. It breaks all kinds of assumptions down
+ // stream. A PHI use is expected to be the kill of its source values.
+ for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
+ if (UI.isPHI())
+ PHIBBs.insert(UI.getParent());
+
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ MachineOperand *UseMO = Uses[i];
+ MachineInstr *UseMI = UseMO->getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (PHIBBs.count(UseMBB))
+ continue;
+
+ // About to add uses of DstReg, clear DstReg's kill flags.
+ if (!Changed) {
+ MRI->clearKillFlags(DstReg);
+ MRI->constrainRegClass(DstReg, DstRC);
+ }
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(DstReg, 0, SubIdx);
+ // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
+ if (UseSrcSubIdx) {
+ Copy->getOperand(0).setSubReg(SubIdx);
+ Copy->getOperand(0).setIsUndef();
+ }
+ UseMO->setReg(NewVR);
+ ++NumReuse;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// If the instruction is a compare and the previous instruction it's comparing
+/// against already sets (or could be modified to set) the same flag as the
+/// compare, then we can remove the comparison and use the flag from the
+/// previous instruction.
+bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ // If this instruction is a comparison against zero and isn't comparing a
+ // physical register, we can try to optimize it.
+ unsigned SrcReg, SrcReg2;
+ int CmpMask, CmpValue;
+ if (!TII->analyzeCompare(*MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+ (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
+ return false;
+
+ // Attempt to optimize the comparison instruction.
+ if (TII->optimizeCompareInstr(*MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
+ ++NumCmps;
+ return true;
+ }
+
+ return false;
+}
+
+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI,
+ SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
+ unsigned TrueOp = 0;
+ unsigned FalseOp = 0;
+ bool Optimizable = false;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->analyzeSelect(*MI, Cond, TrueOp, FalseOp, Optimizable))
+ return false;
+ if (!Optimizable)
+ return false;
+ if (!TII->optimizeSelect(*MI, LocalMIs))
+ return false;
+ MI->eraseFromParent();
+ ++NumSelects;
+ return true;
+}
+
+/// \brief Check if a simpler conditional branch can be
+// generated
+bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
+ return TII->optimizeCondBranch(*MI);
+}
+
+/// \brief Try to find the next source that share the same register file
+/// for the value defined by \p Reg and \p SubReg.
+/// When true is returned, the \p RewriteMap can be used by the client to
+/// retrieve all Def -> Use along the way up to the next source. Any found
+/// Use that is not itself a key for another entry, is the next source to
+/// use. During the search for the next source, multiple sources can be found
+/// given multiple incoming sources of a PHI instruction. In this case, we
+/// look in each PHI source for the next source; all found next sources must
+/// share the same register file as \p Reg and \p SubReg. The client should
+/// then be capable to rewrite all intermediate PHIs to get the next source.
+/// \return False if no alternative sources are available. True otherwise.
+bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
+ RewriteMapTy &RewriteMap) {
+ // Do not try to find a new source for a physical register.
+ // So far we do not have any motivating example for doing that.
+ // Thus, instead of maintaining untested code, we will revisit that if
+ // that changes at some point.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return false;
+ const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
+
+ SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook;
+ TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg);
+ SrcToLook.push_back(CurSrcPair);
+
+ unsigned PHICount = 0;
+ while (!SrcToLook.empty() && PHICount < RewritePHILimit) {
+ TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val();
+ // As explained above, do not handle physical registers
+ if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg))
+ return false;
+
+ CurSrcPair = Pair;
+ ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
+ !DisableAdvCopyOpt, TII);
+ ValueTrackerResult Res;
+ bool ShouldRewrite = false;
+
+ do {
+ // Follow the chain of copies until we reach the top of the use-def chain
+ // or find a more suitable source.
+ Res = ValTracker.getNextSource();
+ if (!Res.isValid())
+ break;
+
+ // Insert the Def -> Use entry for the recently found source.
+ ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
+ if (CurSrcRes.isValid()) {
+ assert(CurSrcRes == Res && "ValueTrackerResult found must match");
+ // An existent entry with multiple sources is a PHI cycle we must avoid.
+ // Otherwise it's an entry with a valid next source we already found.
+ if (CurSrcRes.getNumSources() > 1) {
+ DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n");
+ return false;
+ }
+ break;
+ }
+ RewriteMap.insert(std::make_pair(CurSrcPair, Res));
+
+ // ValueTrackerResult usually have one source unless it's the result from
+ // a PHI instruction. Add the found PHI edges to be looked up further.
+ unsigned NumSrcs = Res.getNumSources();
+ if (NumSrcs > 1) {
+ PHICount++;
+ for (unsigned i = 0; i < NumSrcs; ++i)
+ SrcToLook.push_back(TargetInstrInfo::RegSubRegPair(
+ Res.getSrcReg(i), Res.getSrcSubReg(i)));
+ break;
+ }
+
+ CurSrcPair.Reg = Res.getSrcReg(0);
+ CurSrcPair.SubReg = Res.getSrcSubReg(0);
+ // Do not extend the live-ranges of physical registers as they add
+ // constraints to the register allocator. Moreover, if we want to extend
+ // the live-range of a physical register, unlike SSA virtual register,
+ // we will have to check that they aren't redefine before the related use.
+ if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
+ return false;
+
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
+ ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
+ CurSrcPair.SubReg);
+ } while (!ShouldRewrite);
+
+ // Continue looking for new sources...
+ if (Res.isValid())
+ continue;
+
+ // Do not continue searching for a new source if the there's at least
+ // one use-def which cannot be rewritten.
+ if (!ShouldRewrite)
+ return false;
+ }
+
+ if (PHICount >= RewritePHILimit) {
+ DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
+ return false;
+ }
+
+ // If we did not find a more suitable source, there is nothing to optimize.
+ return CurSrcPair.Reg != Reg;
+}
+
+/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are
+/// guaranteed to have the same register class. This is necessary whenever we
+/// successfully traverse a PHI instruction and find suitable sources coming
+/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
+/// suitable to be used in a new COPY instruction.
+static MachineInstr *
+insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs,
+ MachineInstr *OrigPHI) {
+ assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
+
+ const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
+ unsigned NewVR = MRI->createVirtualRegister(NewRC);
+ MachineBasicBlock *MBB = OrigPHI->getParent();
+ MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
+ TII->get(TargetOpcode::PHI), NewVR);
+
+ unsigned MBBOpIdx = 2;
+ for (auto RegPair : SrcRegs) {
+ MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
+ MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB());
+ // Since we're extended the lifetime of RegPair.Reg, clear the
+ // kill flags to account for that and make RegPair.Reg reaches
+ // the new PHI.
+ MRI->clearKillFlags(RegPair.Reg);
+ MBBOpIdx += 2;
+ }
+
+ return MIB;
+}
+
+namespace {
+/// \brief Helper class to rewrite the arguments of a copy-like instruction.
+class CopyRewriter {
+protected:
+ /// The copy-like instruction.
+ MachineInstr &CopyLike;
+ /// The index of the source being rewritten.
+ unsigned CurrentSrcIdx;
+
+public:
+ CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {}
+
+ virtual ~CopyRewriter() {}
+
+ /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
+ /// the related value that it affects (TrackReg, TrackSubReg).
+ /// A source is considered rewritable if its register class and the
+ /// register class of the related TrackReg may not be register
+ /// coalescer friendly. In other words, given a copy-like instruction
+ /// not all the arguments may be returned at rewritable source, since
+ /// some arguments are none to be register coalescer friendly.
+ ///
+ /// Each call of this method moves the current source to the next
+ /// rewritable source.
+ /// For instance, let CopyLike be the instruction to rewrite.
+ /// CopyLike has one definition and one source:
+ /// dst.dstSubIdx = CopyLike src.srcSubIdx.
+ ///
+ /// The first call will give the first rewritable source, i.e.,
+ /// the only source this instruction has:
+ /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
+ /// This source defines the whole definition, i.e.,
+ /// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
+ ///
+ /// The second and subsequent calls will return false, as there is only one
+ /// rewritable source.
+ ///
+ /// \return True if a rewritable source has been found, false otherwise.
+ /// The output arguments are valid if and only if true is returned.
+ virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) {
+ // If CurrentSrcIdx == 1, this means this function has already been called
+ // once. CopyLike has one definition and one argument, thus, there is
+ // nothing else to rewrite.
+ if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
+ return false;
+ // This is the first call to getNextRewritableSource.
+ // Move the CurrentSrcIdx to remember that we made that call.
+ CurrentSrcIdx = 1;
+ // The rewritable source is the argument.
+ const MachineOperand &MOSrc = CopyLike.getOperand(1);
+ SrcReg = MOSrc.getReg();
+ SrcSubReg = MOSrc.getSubReg();
+ // What we track are the alternative sources of the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ TrackReg = MODef.getReg();
+ TrackSubReg = MODef.getSubReg();
+ return true;
+ }
+
+ /// \brief Rewrite the current source with \p NewReg and \p NewSubReg
+ /// if possible.
+ /// \return True if the rewriting was possible, false otherwise.
+ virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
+ if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
+ return false;
+ MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
+ MOSrc.setReg(NewReg);
+ MOSrc.setSubReg(NewSubReg);
+ return true;
+ }
+
+ /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find
+ /// the new source to use for rewrite. If \p HandleMultipleSources is true and
+ /// multiple sources for a given \p Def are found along the way, we found a
+ /// PHI instructions that needs to be rewritten.
+ /// TODO: HandleMultipleSources should be removed once we test PHI handling
+ /// with coalescable copies.
+ TargetInstrInfo::RegSubRegPair
+ getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ TargetInstrInfo::RegSubRegPair Def,
+ PeepholeOptimizer::RewriteMapTy &RewriteMap,
+ bool HandleMultipleSources = true) {
+
+ TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
+ do {
+ ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
+ // If there are no entries on the map, LookupSrc is the new source.
+ if (!Res.isValid())
+ return LookupSrc;
+
+ // There's only one source for this definition, keep searching...
+ unsigned NumSrcs = Res.getNumSources();
+ if (NumSrcs == 1) {
+ LookupSrc.Reg = Res.getSrcReg(0);
+ LookupSrc.SubReg = Res.getSrcSubReg(0);
+ continue;
+ }
+
+ // TODO: Remove once multiple srcs w/ coalescable copies are supported.
+ if (!HandleMultipleSources)
+ break;
+
+ // Multiple sources, recurse into each source to find a new source
+ // for it. Then, rewrite the PHI accordingly to its new edges.
+ SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs;
+ for (unsigned i = 0; i < NumSrcs; ++i) {
+ TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i),
+ Res.getSrcSubReg(i));
+ NewPHISrcs.push_back(
+ getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
+ }
+
+ // Build the new PHI node and return its def register as the new source.
+ MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst());
+ MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI);
+ DEBUG(dbgs() << "-- getNewSource\n");
+ DEBUG(dbgs() << " Replacing: " << *OrigPHI);
+ DEBUG(dbgs() << " With: " << *NewPHI);
+ const MachineOperand &MODef = NewPHI->getOperand(0);
+ return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+
+ } while (1);
+
+ return TargetInstrInfo::RegSubRegPair(0, 0);
+ }
+
+ /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
+ /// and create a new COPY instruction. More info about RewriteMap in
+ /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
+ /// Uncoalescable copies, since they are copy like instructions that aren't
+ /// recognized by the register allocator.
+ virtual MachineInstr *
+ RewriteSource(TargetInstrInfo::RegSubRegPair Def,
+ PeepholeOptimizer::RewriteMapTy &RewriteMap) {
+ return nullptr;
+ }
+};
+
+/// \brief Helper class to rewrite uncoalescable copy like instructions
+/// into new COPY (coalescable friendly) instructions.
+class UncoalescableRewriter : public CopyRewriter {
+protected:
+ const TargetInstrInfo &TII;
+ MachineRegisterInfo &MRI;
+ /// The number of defs in the bitcast
+ unsigned NumDefs;
+
+public:
+ UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI)
+ : CopyRewriter(MI), TII(TII), MRI(MRI) {
+ NumDefs = MI.getDesc().getNumDefs();
+ }
+
+ /// \brief Get the next rewritable def source (TrackReg, TrackSubReg)
+ /// All such sources need to be considered rewritable in order to
+ /// rewrite a uncoalescable copy-like instruction. This method return
+ /// each definition that must be checked if rewritable.
+ ///
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // Find the next non-dead definition and continue from there.
+ if (CurrentSrcIdx == NumDefs)
+ return false;
+
+ while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
+ ++CurrentSrcIdx;
+ if (CurrentSrcIdx == NumDefs)
+ return false;
+ }
+
+ // What we track are the alternative sources of the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
+ TrackReg = MODef.getReg();
+ TrackSubReg = MODef.getSubReg();
+
+ CurrentSrcIdx++;
+ return true;
+ }
+
+ /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
+ /// and create a new COPY instruction. More info about RewriteMap in
+ /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
+ /// Uncoalescable copies, since they are copy like instructions that aren't
+ /// recognized by the register allocator.
+ MachineInstr *
+ RewriteSource(TargetInstrInfo::RegSubRegPair Def,
+ PeepholeOptimizer::RewriteMapTy &RewriteMap) override {
+ assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
+ "We do not rewrite physical registers");
+
+ // Find the new source to use in the COPY rewrite.
+ TargetInstrInfo::RegSubRegPair NewSrc =
+ getNewSource(&MRI, &TII, Def, RewriteMap);
+
+ // Insert the COPY.
+ const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg);
+ unsigned NewVR = MRI.createVirtualRegister(DefRC);
+
+ MachineInstr *NewCopy =
+ BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), NewVR)
+ .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
+
+ NewCopy->getOperand(0).setSubReg(Def.SubReg);
+ if (Def.SubReg)
+ NewCopy->getOperand(0).setIsUndef();
+
+ DEBUG(dbgs() << "-- RewriteSource\n");
+ DEBUG(dbgs() << " Replacing: " << CopyLike);
+ DEBUG(dbgs() << " With: " << *NewCopy);
+ MRI.replaceRegWith(Def.Reg, NewVR);
+ MRI.clearKillFlags(NewVR);
+
+ // We extended the lifetime of NewSrc.Reg, clear the kill flags to
+ // account for that.
+ MRI.clearKillFlags(NewSrc.Reg);
+
+ return NewCopy;
+ }
+};
+
+/// \brief Specialized rewriter for INSERT_SUBREG instruction.
+class InsertSubregRewriter : public CopyRewriter {
+public:
+ InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) {
+ assert(MI.isInsertSubreg() && "Invalid instruction");
+ }
+
+ /// \brief See CopyRewriter::getNextRewritableSource.
+ /// Here CopyLike has the following form:
+ /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
+ /// Src1 has the same register class has dst, hence, there is
+ /// nothing to rewrite.
+ /// Src2.src2SubIdx, may not be register coalescer friendly.
+ /// Therefore, the first call to this method returns:
+ /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+ /// (TrackReg, TrackSubReg) = (dst, subIdx).
+ ///
+ /// Subsequence calls will return false.
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // If we already get the only source we can rewrite, return false.
+ if (CurrentSrcIdx == 2)
+ return false;
+ // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
+ CurrentSrcIdx = 2;
+ const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
+ SrcReg = MOInsertedReg.getReg();
+ SrcSubReg = MOInsertedReg.getSubReg();
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+
+ // We want to track something that is compatible with the
+ // partial definition.
+ TrackReg = MODef.getReg();
+ if (MODef.getSubReg())
+ // Bail if we have to compose sub-register indices.
+ return false;
+ TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
+ return true;
+ }
+ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ if (CurrentSrcIdx != 2)
+ return false;
+ // We are rewriting the inserted reg.
+ MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+ MO.setReg(NewReg);
+ MO.setSubReg(NewSubReg);
+ return true;
+ }
+};
+
+/// \brief Specialized rewriter for EXTRACT_SUBREG instruction.
+class ExtractSubregRewriter : public CopyRewriter {
+ const TargetInstrInfo &TII;
+
+public:
+ ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
+ : CopyRewriter(MI), TII(TII) {
+ assert(MI.isExtractSubreg() && "Invalid instruction");
+ }
+
+ /// \brief See CopyRewriter::getNextRewritableSource.
+ /// Here CopyLike has the following form:
+ /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
+ /// There is only one rewritable source: Src.subIdx,
+ /// which defines dst.dstSubIdx.
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // If we already get the only source we can rewrite, return false.
+ if (CurrentSrcIdx == 1)
+ return false;
+ // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
+ CurrentSrcIdx = 1;
+ const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
+ SrcReg = MOExtractedReg.getReg();
+ // If we have to compose sub-register indices, bail out.
+ if (MOExtractedReg.getSubReg())
+ return false;
+
+ SrcSubReg = CopyLike.getOperand(2).getImm();
+
+ // We want to track something that is compatible with the definition.
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ TrackReg = MODef.getReg();
+ TrackSubReg = MODef.getSubReg();
+ return true;
+ }
+
+ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ // The only source we can rewrite is the input register.
+ if (CurrentSrcIdx != 1)
+ return false;
+
+ CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
+
+ // If we find a source that does not require to extract something,
+ // rewrite the operation with a copy.
+ if (!NewSubReg) {
+ // Move the current index to an invalid position.
+ // We do not want another call to this method to be able
+ // to do any change.
+ CurrentSrcIdx = -1;
+ // Rewrite the operation as a COPY.
+ // Get rid of the sub-register index.
+ CopyLike.RemoveOperand(2);
+ // Morph the operation into a COPY.
+ CopyLike.setDesc(TII.get(TargetOpcode::COPY));
+ return true;
+ }
+ CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
+ return true;
+ }
+};
+
+/// \brief Specialized rewriter for REG_SEQUENCE instruction.
+class RegSequenceRewriter : public CopyRewriter {
+public:
+ RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) {
+ assert(MI.isRegSequence() && "Invalid instruction");
+ }
+
+ /// \brief See CopyRewriter::getNextRewritableSource.
+ /// Here CopyLike has the following form:
+ /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
+ /// Each call will return a different source, walking all the available
+ /// source.
+ ///
+ /// The first call returns:
+ /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
+ /// (TrackReg, TrackSubReg) = (dst, subIdx1).
+ ///
+ /// The second call returns:
+ /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
+ /// (TrackReg, TrackSubReg) = (dst, subIdx2).
+ ///
+ /// And so on, until all the sources have been traversed, then
+ /// it returns false.
+ bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+ unsigned &TrackReg,
+ unsigned &TrackSubReg) override {
+ // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
+
+ // If this is the first call, move to the first argument.
+ if (CurrentSrcIdx == 0) {
+ CurrentSrcIdx = 1;
+ } else {
+ // Otherwise, move to the next argument and check that it is valid.
+ CurrentSrcIdx += 2;
+ if (CurrentSrcIdx >= CopyLike.getNumOperands())
+ return false;
+ }
+ const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
+ SrcReg = MOInsertedReg.getReg();
+ // If we have to compose sub-register indices, bail out.
+ if ((SrcSubReg = MOInsertedReg.getSubReg()))
+ return false;
+
+ // We want to track something that is compatible with the related
+ // partial definition.
+ TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
+
+ const MachineOperand &MODef = CopyLike.getOperand(0);
+ TrackReg = MODef.getReg();
+ // If we have to compose sub-registers, bail.
+ return MODef.getSubReg() == 0;
+ }
+
+ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ // We cannot rewrite out of bound operands.
+ // Moreover, rewritable sources are at odd positions.
+ if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
+ return false;
+
+ MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
+ MO.setReg(NewReg);
+ MO.setSubReg(NewSubReg);
+ return true;
+ }
+};
+} // End namespace.
+
+/// \brief Get the appropriated CopyRewriter for \p MI.
+/// \return A pointer to a dynamically allocated CopyRewriter or nullptr
+/// if no rewriter works for \p MI.
+static CopyRewriter *getCopyRewriter(MachineInstr &MI,
+ const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI) {
+ // Handle uncoalescable copy-like instructions.
+ if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+ MI.isExtractSubregLike()))
+ return new UncoalescableRewriter(MI, TII, MRI);
+
+ switch (MI.getOpcode()) {
+ default:
+ return nullptr;
+ case TargetOpcode::COPY:
+ return new CopyRewriter(MI);
+ case TargetOpcode::INSERT_SUBREG:
+ return new InsertSubregRewriter(MI);
+ case TargetOpcode::EXTRACT_SUBREG:
+ return new ExtractSubregRewriter(MI, TII);
+ case TargetOpcode::REG_SEQUENCE:
+ return new RegSequenceRewriter(MI);
+ }
+ llvm_unreachable(nullptr);
+}
+
+/// \brief Optimize generic copy instructions to avoid cross
+/// register bank copy. The optimization looks through a chain of
+/// copies and tries to find a source that has a compatible register
+/// class.
+/// Two register classes are considered to be compatible if they share
+/// the same register bank.
+/// New copies issued by this optimization are register allocator
+/// friendly. This optimization does not remove any copy as it may
+/// overconstrain the register allocator, but replaces some operands
+/// when possible.
+/// \pre isCoalescableCopy(*MI) is true.
+/// \return True, when \p MI has been rewritten. False otherwise.
+bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
+ assert(MI && isCoalescableCopy(*MI) && "Invalid argument");
+ assert(MI->getDesc().getNumDefs() == 1 &&
+ "Coalescer can understand multiple defs?!");
+ const MachineOperand &MODef = MI->getOperand(0);
+ // Do not rewrite physical definitions.
+ if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+ return false;
+
+ bool Changed = false;
+ // Get the right rewriter for the current copy.
+ std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
+ // If none exists, bail out.
+ if (!CpyRewriter)
+ return false;
+ // Rewrite each rewritable source.
+ unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
+ while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
+ TrackSubReg)) {
+ // Keep track of PHI nodes and its incoming edges when looking for sources.
+ RewriteMapTy RewriteMap;
+ // Try to find a more suitable source. If we failed to do so, or get the
+ // actual source, move to the next source.
+ if (!findNextSource(TrackReg, TrackSubReg, RewriteMap))
+ continue;
+
+ // Get the new source to rewrite. TODO: Only enable handling of multiple
+ // sources (PHIs) once we have a motivating example and testcases for it.
+ TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg);
+ TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource(
+ MRI, TII, TrackPair, RewriteMap, false /* multiple sources */);
+ if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0)
+ continue;
+
+ // Rewrite source.
+ if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
+ // We may have extended the live-range of NewSrc, account for that.
+ MRI->clearKillFlags(NewSrc.Reg);
+ Changed = true;
+ }
+ }
+ // TODO: We could have a clean-up method to tidy the instruction.
+ // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
+ // => v0 = COPY v1
+ // Currently we haven't seen motivating example for that and we
+ // want to avoid untested code.
+ NumRewrittenCopies += Changed;
+ return Changed;
+}
+
+/// \brief Optimize copy-like instructions to create
+/// register coalescer friendly instruction.
+/// The optimization tries to kill-off the \p MI by looking
+/// through a chain of copies to find a source that has a compatible
+/// register class.
+/// If such a source is found, it replace \p MI by a generic COPY
+/// operation.
+/// \pre isUncoalescableCopy(*MI) is true.
+/// \return True, when \p MI has been optimized. In that case, \p MI has
+/// been removed from its parent.
+/// All COPY instructions created, are inserted in \p LocalMIs.
+bool PeepholeOptimizer::optimizeUncoalescableCopy(
+ MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
+ assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
+
+ // Check if we can rewrite all the values defined by this instruction.
+ SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs;
+ // Get the right rewriter for the current copy.
+ std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
+ // If none exists, bail out.
+ if (!CpyRewriter)
+ return false;
+
+ // Rewrite each rewritable source by generating new COPYs. This works
+ // differently from optimizeCoalescableCopy since it first makes sure that all
+ // definitions can be rewritten.
+ RewriteMapTy RewriteMap;
+ unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg;
+ while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg,
+ CopyDefSubReg)) {
+ // If a physical register is here, this is probably for a good reason.
+ // Do not rewrite that.
+ if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg))
+ return false;
+
+ // If we do not know how to rewrite this definition, there is no point
+ // in trying to kill this instruction.
+ TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg);
+ if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap))
+ return false;
+
+ RewritePairs.push_back(Def);
+ }
+
+ // The change is possible for all defs, do it.
+ for (const auto &Def : RewritePairs) {
+ // Rewrite the "copy" in a way the register coalescer understands.
+ MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap);
+ assert(NewCopy && "Should be able to always generate a new copy");
+ LocalMIs.insert(NewCopy);
+ }
+
+ // MI is now dead.
+ MI->eraseFromParent();
+ ++NumUncoalescableCopies;
+ return true;
+}
+
+/// Check whether MI is a candidate for folding into a later instruction.
+/// We only fold loads to virtual registers and the virtual register defined
+/// has a single use.
+bool PeepholeOptimizer::isLoadFoldable(
+ MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
+ if (!MI->canFoldAsLoad() || !MI->mayLoad())
+ return false;
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+
+ unsigned Reg = MI->getOperand(0).getReg();
+ // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
+ // loads. It should be checked when processing uses of the load, since
+ // uses can be removed during peephole.
+ if (!MI->getOperand(0).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MRI->hasOneNonDBGUse(Reg)) {
+ FoldAsLoadDefCandidates.insert(Reg);
+ return true;
+ }
+ return false;
+}
+
+bool PeepholeOptimizer::isMoveImmediate(
+ MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (!MI->isMoveImmediate())
+ return false;
+ if (MCID.getNumDefs() != 1)
+ return false;
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ ImmDefMIs.insert(std::make_pair(Reg, MI));
+ ImmDefRegs.insert(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+/// Try folding register operands that are defined by move immediate
+/// instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::foldImmediate(
+ MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ // Ignore dead implicit defs.
+ if (MO.isImplicit() && MO.isDead())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (ImmDefRegs.count(Reg) == 0)
+ continue;
+ DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
+ if (TII->FoldImmediate(*MI, *II->second, Reg, MRI)) {
+ ++NumImmFold;
+ return true;
+ }
+ }
+ return false;
+}
+
+// FIXME: This is very simple and misses some cases which should be handled when
+// motivating examples are found.
+//
+// The copy rewriting logic should look at uses as well as defs and be able to
+// eliminate copies across blocks.
+//
+// Later copies that are subregister extracts will also not be eliminated since
+// only the first copy is considered.
+//
+// e.g.
+// %vreg1 = COPY %vreg0
+// %vreg2 = COPY %vreg0:sub1
+//
+// Should replace %vreg2 uses with %vreg1:sub1
+bool PeepholeOptimizer::foldRedundantCopy(
+ MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs,
+ DenseMap<unsigned, MachineInstr *> &CopyMIs) {
+ assert(MI->isCopy() && "expected a COPY machine instruction");
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return false;
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ return false;
+
+ if (CopySrcRegs.insert(SrcReg).second) {
+ // First copy of this reg seen.
+ CopyMIs.insert(std::make_pair(SrcReg, MI));
+ return false;
+ }
+
+ MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
+
+ unsigned SrcSubReg = MI->getOperand(1).getSubReg();
+ unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
+
+ // Can't replace different subregister extracts.
+ if (SrcSubReg != PrevSrcSubReg)
+ return false;
+
+ unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
+
+ // Only replace if the copy register class is the same.
+ //
+ // TODO: If we have multiple copies to different register classes, we may want
+ // to track multiple copies of the same source register.
+ if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
+ return false;
+
+ MRI->replaceRegWith(DstReg, PrevDstReg);
+
+ // Lifetime of the previous copy has been extended.
+ MRI->clearKillFlags(PrevDstReg);
+ return true;
+}
+
+bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
+ return TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ !MRI->isAllocatable(Reg);
+}
+
+bool PeepholeOptimizer::foldRedundantNAPhysCopy(
+ MachineInstr *MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) {
+ assert(MI->isCopy() && "expected a COPY machine instruction");
+
+ if (DisableNAPhysCopyOpt)
+ return false;
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ // %vreg = COPY %PHYSREG
+ // Avoid using a datastructure which can track multiple live non-allocatable
+ // phys->virt copies since LLVM doesn't seem to do this.
+ NAPhysToVirtMIs.insert({SrcReg, MI});
+ return false;
+ }
+
+ if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
+ return false;
+
+ // %PHYSREG = COPY %vreg
+ auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
+ if (PrevCopy == NAPhysToVirtMIs.end()) {
+ // We can't remove the copy: there was an intervening clobber of the
+ // non-allocatable physical register after the copy to virtual.
+ DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI
+ << '\n');
+ return false;
+ }
+
+ unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg();
+ if (PrevDstReg == SrcReg) {
+ // Remove the virt->phys copy: we saw the virtual register definition, and
+ // the non-allocatable physical register's state hasn't changed since then.
+ DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n');
+ ++NumNAPhysCopies;
+ return true;
+ }
+
+ // Potential missed optimization opportunity: we saw a different virtual
+ // register get a copy of the non-allocatable physical register, and we only
+ // track one such copy. Avoid getting confused by this new non-allocatable
+ // physical register definition, and remove it from the tracked copies.
+ DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n');
+ NAPhysToVirtMIs.erase(PrevCopy);
+ return false;
+}
+
+bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
+ DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
+
+ if (DisablePeephole)
+ return false;
+
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ bool SeenMoveImm = false;
+
+ // During this forward scan, at some point it needs to answer the question
+ // "given a pointer to an MI in the current BB, is it located before or
+ // after the current instruction".
+ // To perform this, the following set keeps track of the MIs already seen
+ // during the scan, if a MI is not in the set, it is assumed to be located
+ // after. Newly created MIs have to be inserted in the set as well.
+ SmallPtrSet<MachineInstr*, 16> LocalMIs;
+ SmallSet<unsigned, 4> ImmDefRegs;
+ DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+ SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
+
+ // Track when a non-allocatable physical register is copied to a virtual
+ // register so that useless moves can be removed.
+ //
+ // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG`
+ // without any intervening re-definition of %PHYSREG.
+ DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
+
+ // Set of virtual registers that are copied from.
+ SmallSet<unsigned, 4> CopySrcRegs;
+ DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+
+ for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+ MII != MIE; ) {
+ MachineInstr *MI = &*MII;
+ // We may be erasing MI below, increment MII now.
+ ++MII;
+ LocalMIs.insert(MI);
+
+ // Skip debug values. They should not affect this peephole optimization.
+ if (MI->isDebugValue())
+ continue;
+
+ // If we run into an instruction we can't fold across, discard
+ // the load candidates.
+ if (MI->isLoadFoldBarrier())
+ FoldAsLoadDefCandidates.clear();
+
+ if (MI->isPosition() || MI->isPHI())
+ continue;
+
+ if (!MI->isCopy()) {
+ for (const auto &Op : MI->operands()) {
+ // Visit all operands: definitions can be implicit or explicit.
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ if (Op.isDef() && isNAPhysCopy(Reg)) {
+ const auto &Def = NAPhysToVirtMIs.find(Reg);
+ if (Def != NAPhysToVirtMIs.end()) {
+ // A new definition of the non-allocatable physical register
+ // invalidates previous copies.
+ DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
+ << '\n');
+ NAPhysToVirtMIs.erase(Def);
+ }
+ }
+ } else if (Op.isRegMask()) {
+ const uint32_t *RegMask = Op.getRegMask();
+ for (auto &RegMI : NAPhysToVirtMIs) {
+ unsigned Def = RegMI.first;
+ if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
+ DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
+ << '\n');
+ NAPhysToVirtMIs.erase(Def);
+ }
+ }
+ }
+ }
+ }
+
+ if (MI->isImplicitDef() || MI->isKill())
+ continue;
+
+ if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
+ // Blow away all non-allocatable physical registers knowledge since we
+ // don't know what's correct anymore.
+ //
+ // FIXME: handle explicit asm clobbers.
+ DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI
+ << '\n');
+ NAPhysToVirtMIs.clear();
+ continue;
+ }
+
+ if ((isUncoalescableCopy(*MI) &&
+ optimizeUncoalescableCopy(MI, LocalMIs)) ||
+ (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) ||
+ (MI->isSelect() && optimizeSelect(MI, LocalMIs))) {
+ // MI is deleted.
+ LocalMIs.erase(MI);
+ Changed = true;
+ continue;
+ }
+
+ if (MI->isConditionalBranch() && optimizeCondBranch(MI)) {
+ Changed = true;
+ continue;
+ }
+
+ if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) {
+ // MI is just rewritten.
+ Changed = true;
+ continue;
+ }
+
+ if (MI->isCopy() &&
+ (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) ||
+ foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) {
+ LocalMIs.erase(MI);
+ MI->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+ SeenMoveImm = true;
+ } else {
+ Changed |= optimizeExtInstr(MI, &MBB, LocalMIs);
+ // optimizeExtInstr might have created new instructions after MI
+ // and before the already incremented MII. Adjust MII so that the
+ // next iteration sees the new instructions.
+ MII = MI;
+ ++MII;
+ if (SeenMoveImm)
+ Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs);
+ }
+
+ // Check whether MI is a load candidate for folding into a later
+ // instruction. If MI is not a candidate, check whether we can fold an
+ // earlier load into MI.
+ if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
+ !FoldAsLoadDefCandidates.empty()) {
+ const MCInstrDesc &MIDesc = MI->getDesc();
+ for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
+ ++i) {
+ const MachineOperand &MOp = MI->getOperand(i);
+ if (!MOp.isReg())
+ continue;
+ unsigned FoldAsLoadDefReg = MOp.getReg();
+ if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
+ // We need to fold load after optimizeCmpInstr, since
+ // optimizeCmpInstr can enable folding by converting SUB to CMP.
+ // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
+ // we need it for markUsesInDebugValueAsUndef().
+ unsigned FoldedReg = FoldAsLoadDefReg;
+ MachineInstr *DefMI = nullptr;
+ if (MachineInstr *FoldMI =
+ TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
+ // Update LocalMIs since we replaced MI with FoldMI and deleted
+ // DefMI.
+ DEBUG(dbgs() << "Replacing: " << *MI);
+ DEBUG(dbgs() << " With: " << *FoldMI);
+ LocalMIs.erase(MI);
+ LocalMIs.erase(DefMI);
+ LocalMIs.insert(FoldMI);
+ MI->eraseFromParent();
+ DefMI->eraseFromParent();
+ MRI->markUsesInDebugValueAsUndef(FoldedReg);
+ FoldAsLoadDefCandidates.erase(FoldedReg);
+ ++NumLoadFold;
+ // MI is replaced with FoldMI.
+ Changed = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
+ assert(Def->isCopy() && "Invalid definition");
+ // Copy instruction are supposed to be: Def = Src.
+ // If someone breaks this assumption, bad things will happen everywhere.
+ assert(Def->getNumOperands() == 2 && "Invalid number of operands");
+
+ if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+ // If we look for a different subreg, it means we want a subreg of src.
+ // Bails as we do not support composing subregs yet.
+ return ValueTrackerResult();
+ // Otherwise, we want the whole source.
+ const MachineOperand &Src = Def->getOperand(1);
+ return ValueTrackerResult(Src.getReg(), Src.getSubReg());
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
+ assert(Def->isBitcast() && "Invalid definition");
+
+ // Bail if there are effects that a plain copy will not expose.
+ if (Def->hasUnmodeledSideEffects())
+ return ValueTrackerResult();
+
+ // Bitcasts with more than one def are not supported.
+ if (Def->getDesc().getNumDefs() != 1)
+ return ValueTrackerResult();
+ if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+ // If we look for a different subreg, it means we want a subreg of the src.
+ // Bails as we do not support composing subregs yet.
+ return ValueTrackerResult();
+
+ unsigned SrcIdx = Def->getNumOperands();
+ for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
+ ++OpIdx) {
+ const MachineOperand &MO = Def->getOperand(OpIdx);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ // Ignore dead implicit defs.
+ if (MO.isImplicit() && MO.isDead())
+ continue;
+ assert(!MO.isDef() && "We should have skipped all the definitions by now");
+ if (SrcIdx != EndOpIdx)
+ // Multiple sources?
+ return ValueTrackerResult();
+ SrcIdx = OpIdx;
+ }
+ const MachineOperand &Src = Def->getOperand(SrcIdx);
+ return ValueTrackerResult(Src.getReg(), Src.getSubReg());
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
+ assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
+ "Invalid definition");
+
+ if (Def->getOperand(DefIdx).getSubReg())
+ // If we are composing subregs, bail out.
+ // The case we are checking is Def.<subreg> = REG_SEQUENCE.
+ // This should almost never happen as the SSA property is tracked at
+ // the register level (as opposed to the subreg level).
+ // I.e.,
+ // Def.sub0 =
+ // Def.sub1 =
+ // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
+ // Def. Thus, it must not be generated.
+ // However, some code could theoretically generates a single
+ // Def.sub0 (i.e, not defining the other subregs) and we would
+ // have this case.
+ // If we can ascertain (or force) that this never happens, we could
+ // turn that into an assertion.
+ return ValueTrackerResult();
+
+ if (!TII)
+ // We could handle the REG_SEQUENCE here, but we do not want to
+ // duplicate the code from the generic TII.
+ return ValueTrackerResult();
+
+ SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
+ if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
+ return ValueTrackerResult();
+
+ // We are looking at:
+ // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
+ // Check if one of the operand defines the subreg we are interested in.
+ for (auto &RegSeqInput : RegSeqInputRegs) {
+ if (RegSeqInput.SubIdx == DefSubReg) {
+ if (RegSeqInput.SubReg)
+ // Bail if we have to compose sub registers.
+ return ValueTrackerResult();
+
+ return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
+ }
+ }
+
+ // If the subreg we are tracking is super-defined by another subreg,
+ // we could follow this value. However, this would require to compose
+ // the subreg and we do not do that for now.
+ return ValueTrackerResult();
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
+ assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
+ "Invalid definition");
+
+ if (Def->getOperand(DefIdx).getSubReg())
+ // If we are composing subreg, bail out.
+ // Same remark as getNextSourceFromRegSequence.
+ // I.e., this may be turned into an assert.
+ return ValueTrackerResult();
+
+ if (!TII)
+ // We could handle the REG_SEQUENCE here, but we do not want to
+ // duplicate the code from the generic TII.
+ return ValueTrackerResult();
+
+ TargetInstrInfo::RegSubRegPair BaseReg;
+ TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
+ if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
+ return ValueTrackerResult();
+
+ // We are looking at:
+ // Def = INSERT_SUBREG v0, v1, sub1
+ // There are two cases:
+ // 1. DefSubReg == sub1, get v1.
+ // 2. DefSubReg != sub1, the value may be available through v0.
+
+ // #1 Check if the inserted register matches the required sub index.
+ if (InsertedReg.SubIdx == DefSubReg) {
+ return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
+ }
+ // #2 Otherwise, if the sub register we are looking for is not partial
+ // defined by the inserted element, we can look through the main
+ // register (v0).
+ const MachineOperand &MODef = Def->getOperand(DefIdx);
+ // If the result register (Def) and the base register (v0) do not
+ // have the same register class or if we have to compose
+ // subregisters, bail out.
+ if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
+ BaseReg.SubReg)
+ return ValueTrackerResult();
+
+ // Get the TRI and check if the inserted sub-register overlaps with the
+ // sub-register we are tracking.
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ if (!TRI ||
+ (TRI->getSubRegIndexLaneMask(DefSubReg) &
+ TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
+ return ValueTrackerResult();
+ // At this point, the value is available in v0 via the same subreg
+ // we used for Def.
+ return ValueTrackerResult(BaseReg.Reg, DefSubReg);
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
+ assert((Def->isExtractSubreg() ||
+ Def->isExtractSubregLike()) && "Invalid definition");
+ // We are looking at:
+ // Def = EXTRACT_SUBREG v0, sub0
+
+ // Bail if we have to compose sub registers.
+ // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
+ if (DefSubReg)
+ return ValueTrackerResult();
+
+ if (!TII)
+ // We could handle the EXTRACT_SUBREG here, but we do not want to
+ // duplicate the code from the generic TII.
+ return ValueTrackerResult();
+
+ TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
+ if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
+ return ValueTrackerResult();
+
+ // Bail if we have to compose sub registers.
+ // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
+ if (ExtractSubregInputReg.SubReg)
+ return ValueTrackerResult();
+ // Otherwise, the value is available in the v0.sub0.
+ return ValueTrackerResult(ExtractSubregInputReg.Reg,
+ ExtractSubregInputReg.SubIdx);
+}
+
+ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
+ assert(Def->isSubregToReg() && "Invalid definition");
+ // We are looking at:
+ // Def = SUBREG_TO_REG Imm, v0, sub0
+
+ // Bail if we have to compose sub registers.
+ // If DefSubReg != sub0, we would have to check that all the bits
+ // we track are included in sub0 and if yes, we would have to
+ // determine the right subreg in v0.
+ if (DefSubReg != Def->getOperand(3).getImm())
+ return ValueTrackerResult();
+ // Bail if we have to compose sub registers.
+ // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
+ if (Def->getOperand(2).getSubReg())
+ return ValueTrackerResult();
+
+ return ValueTrackerResult(Def->getOperand(2).getReg(),
+ Def->getOperand(3).getImm());
+}
+
+/// \brief Explore each PHI incoming operand and return its sources
+ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
+ assert(Def->isPHI() && "Invalid definition");
+ ValueTrackerResult Res;
+
+ // If we look for a different subreg, bail as we do not support composing
+ // subregs yet.
+ if (Def->getOperand(0).getSubReg() != DefSubReg)
+ return ValueTrackerResult();
+
+ // Return all register sources for PHI instructions.
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
+ auto &MO = Def->getOperand(i);
+ assert(MO.isReg() && "Invalid PHI instruction");
+ Res.addSource(MO.getReg(), MO.getSubReg());
+ }
+
+ return Res;
+}
+
+ValueTrackerResult ValueTracker::getNextSourceImpl() {
+ assert(Def && "This method needs a valid definition");
+
+ assert(((Def->getOperand(DefIdx).isDef() &&
+ (DefIdx < Def->getDesc().getNumDefs() ||
+ Def->getDesc().isVariadic())) ||
+ Def->getOperand(DefIdx).isImplicit()) &&
+ "Invalid DefIdx");
+ if (Def->isCopy())
+ return getNextSourceFromCopy();
+ if (Def->isBitcast())
+ return getNextSourceFromBitcast();
+ // All the remaining cases involve "complex" instructions.
+ // Bail if we did not ask for the advanced tracking.
+ if (!UseAdvancedTracking)
+ return ValueTrackerResult();
+ if (Def->isRegSequence() || Def->isRegSequenceLike())
+ return getNextSourceFromRegSequence();
+ if (Def->isInsertSubreg() || Def->isInsertSubregLike())
+ return getNextSourceFromInsertSubreg();
+ if (Def->isExtractSubreg() || Def->isExtractSubregLike())
+ return getNextSourceFromExtractSubreg();
+ if (Def->isSubregToReg())
+ return getNextSourceFromSubregToReg();
+ if (Def->isPHI())
+ return getNextSourceFromPHI();
+ return ValueTrackerResult();
+}
+
+ValueTrackerResult ValueTracker::getNextSource() {
+ // If we reach a point where we cannot move up in the use-def chain,
+ // there is nothing we can get.
+ if (!Def)
+ return ValueTrackerResult();
+
+ ValueTrackerResult Res = getNextSourceImpl();
+ if (Res.isValid()) {
+ // Update definition, definition index, and subregister for the
+ // next call of getNextSource.
+ // Update the current register.
+ bool OneRegSrc = Res.getNumSources() == 1;
+ if (OneRegSrc)
+ Reg = Res.getSrcReg(0);
+ // Update the result before moving up in the use-def chain
+ // with the instruction containing the last found sources.
+ Res.setInst(Def);
+
+ // If we can still move up in the use-def chain, move to the next
+ // definition.
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
+ Def = MRI.getVRegDef(Reg);
+ DefIdx = MRI.def_begin(Reg).getOperandNo();
+ DefSubReg = Res.getSrcSubReg(0);
+ return Res;
+ }
+ }
+ // If we end up here, this means we will not be able to find another source
+ // for the next iteration. Make sure any new call to getNextSource bails out
+ // early by cutting the use-def chain.
+ Def = nullptr;
+ return Res;
+}
diff --git a/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
new file mode 100644
index 000000000000..5bc5f7524dbf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -0,0 +1,98 @@
+//===----- PostRAHazardRecognizer.cpp - hazard recognizer -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This runs the hazard recognizer and emits noops when necessary. This
+/// gives targets a way to run the hazard recognizer without running one of
+/// the schedulers. Example use cases for this pass would be:
+///
+/// - Targets that need the hazard recognizer to be run at -O0.
+/// - Targets that want to guarantee that hazards at the beginning of
+/// scheduling regions are handled correctly. The post-RA scheduler is
+/// a top-down scheduler, but when there are multiple scheduling regions
+/// in a basic block, it visits the regions in bottom-up order. This
+/// makes it impossible for the scheduler to gauranttee it can correctly
+/// handle hazards at the beginning of scheduling regions.
+///
+/// This pass traverses all the instructions in a program in top-down order.
+/// In contrast to the instruction scheduling passes, this pass never resets
+/// the hazard recognizer to ensure it can correctly handles noop hazards at
+/// the begining of blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-hazard-rec"
+
+STATISTIC(NumNoops, "Number of noops inserted");
+
+namespace {
+ class PostRAHazardRecognizer : public MachineFunctionPass {
+
+ public:
+ static char ID;
+ PostRAHazardRecognizer() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ };
+ char PostRAHazardRecognizer::ID = 0;
+
+}
+
+char &llvm::PostRAHazardRecognizerID = PostRAHazardRecognizer::ID;
+
+INITIALIZE_PASS(PostRAHazardRecognizer, DEBUG_TYPE,
+ "Post RA hazard recognizer", false, false)
+
+bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+ std::unique_ptr<ScheduleHazardRecognizer> HazardRec(
+ TII->CreateTargetPostRAHazardRecognizer(Fn));
+
+ // Return if the target has not implemented a hazard recognizer.
+ if (!HazardRec.get())
+ return false;
+
+ // Loop over all of the basic blocks
+ for (auto &MBB : Fn) {
+ // We do not call HazardRec->reset() here to make sure we are handling noop
+ // hazards at the start of basic blocks.
+ for (MachineInstr &MI : MBB) {
+ // If we need to emit noops prior to this instruction, then do so.
+ unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
+ for (unsigned i = 0; i != NumPreNoops; ++i) {
+ HazardRec->EmitNoop();
+ TII->insertNoop(MBB, MachineBasicBlock::iterator(MI));
+ ++NumNoops;
+ }
+
+ HazardRec->EmitInstruction(&MI);
+ if (HazardRec->atIssueLimit()) {
+ HazardRec->AdvanceCycle();
+ }
+ }
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 000000000000..3fce307f3dd4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,705 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AggressiveAntiDepBreaker.h"
+#include "AntiDepBreaker.h"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-sched"
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
+
+// Post-RA scheduling is enabled with
+// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+ cl::desc("Enable scheduling after register allocation"),
+ cl::init(false), cl::Hidden);
+static cl::opt<std::string>
+EnableAntiDepBreaking("break-anti-dependencies",
+ cl::desc("Break post-RA scheduling anti-dependencies: "
+ "\"critical\", \"all\", or \"none\""),
+ cl::init("none"), cl::Hidden);
+
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+ cl::desc("Debug control MBBs that are scheduled"),
+ cl::init(0), cl::Hidden);
+
+AntiDepBreaker::~AntiDepBreaker() { }
+
+namespace {
+ class PostRAScheduler : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
+
+ public:
+ static char ID;
+ PostRAScheduler() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ private:
+ bool enablePostRAScheduler(
+ const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
+ };
+ char PostRAScheduler::ID = 0;
+
+ class SchedulePostRATDList : public ScheduleDAGInstrs {
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ LatencyPriorityQueue AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+ AntiDepBreaker *AntiDepBreak;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
+ /// Ordered list of DAG postprocessing steps.
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
+ /// The index in BB of RegionEnd.
+ ///
+ /// This is the instruction number from the top of the current block, not
+ /// the SlotIndex. It is only used by the AntiDepBreaker.
+ unsigned EndIndex;
+
+ public:
+ SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
+
+ ~SchedulePostRATDList() override;
+
+ /// startBlock - Initialize register live-range state for scheduling in
+ /// this block.
+ ///
+ void startBlock(MachineBasicBlock *BB) override;
+
+ // Set the index of RegionEnd within the current BB.
+ void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
+
+ /// Initialize the scheduler state for the next scheduling region.
+ void enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs) override;
+
+ /// Notify that the scheduler has finished scheduling the current region.
+ void exitRegion() override;
+
+ /// Schedule - Schedule the instruction range using list scheduling.
+ ///
+ void schedule() override;
+
+ void EmitSchedule();
+
+ /// Observe - Update liveness information to account for the current
+ /// instruction, which will not be scheduled.
+ ///
+ void Observe(MachineInstr &MI, unsigned Count);
+
+ /// finishBlock - Clean up register live-range state.
+ ///
+ void finishBlock() override;
+
+ private:
+ /// Apply each ScheduleDAGMutation step in order.
+ void postprocessDAG();
+
+ void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void ReleaseSuccessors(SUnit *SU);
+ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void ListScheduleTopDown();
+
+ void dumpSchedule() const;
+ void emitNoop(unsigned CurCycle);
+ };
+}
+
+char &llvm::PostRASchedulerID = PostRAScheduler::ID;
+
+INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
+ "Post RA top-down list latency scheduler", false, false)
+
+SchedulePostRATDList::SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const RegisterClassInfo &RCI,
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) {
+
+ const InstrItineraryData *InstrItins =
+ MF.getSubtarget().getInstrItineraryData();
+ HazardRec =
+ MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer(
+ InstrItins, this);
+ MF.getSubtarget().getPostRAMutations(Mutations);
+
+ assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE ||
+ MRI.tracksLiveness()) &&
+ "Live-ins must be accurate for anti-dependency breaking");
+ AntiDepBreak =
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
+ ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
+ (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : nullptr));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+ delete HazardRec;
+ delete AntiDepBreak;
+}
+
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs) {
+ ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
+ Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+ ScheduleDAGInstrs::exitRegion();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// dumpSchedule - dump the scheduled Sequence.
+void SchedulePostRATDList::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+#endif
+
+bool PostRAScheduler::enablePostRAScheduler(
+ const TargetSubtargetInfo &ST,
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
+ Mode = ST.getAntiDepBreakMode();
+ ST.getCriticalPathRCs(CriticalPathRCs);
+
+ // Check for explicit enable/disable of post-ra scheduling.
+ if (EnablePostRAScheduler.getPosition() > 0)
+ return EnablePostRAScheduler;
+
+ return ST.enablePostRAScheduler() &&
+ OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
+}
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(*Fn.getFunction()))
+ return false;
+
+ TII = Fn.getSubtarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+
+ RegClassInfo.runOnMachineFunction(Fn);
+
+ TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+ TargetSubtargetInfo::ANTIDEP_NONE;
+ SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
+
+ // Check that post-RA scheduling is enabled for this target.
+ // This may upgrade the AntiDepMode.
+ if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(),
+ AntiDepMode, CriticalPathRCs))
+ return false;
+
+ // Check for antidep breaking override...
+ if (EnableAntiDepBreaking.getPosition() > 0) {
+ AntiDepMode = (EnableAntiDepBreaking == "all")
+ ? TargetSubtargetInfo::ANTIDEP_ALL
+ : ((EnableAntiDepBreaking == "critical")
+ ? TargetSubtargetInfo::ANTIDEP_CRITICAL
+ : TargetSubtargetInfo::ANTIDEP_NONE);
+ }
+
+ DEBUG(dbgs() << "PostRAScheduler\n");
+
+ SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode,
+ CriticalPathRCs);
+
+ // Loop over all of the basic blocks
+ for (auto &MBB : Fn) {
+#ifndef NDEBUG
+ // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+ if (DebugDiv > 0) {
+ static int bbcnt = 0;
+ if (bbcnt++ % DebugDiv != DebugMod)
+ continue;
+ dbgs() << "*** DEBUG scheduling " << Fn.getName()
+ << ":BB#" << MBB.getNumber() << " ***\n";
+ }
+#endif
+
+ // Initialize register live-range state for scheduling in this block.
+ Scheduler.startBlock(&MBB);
+
+ // Schedule each sequence of instructions not interrupted by a label
+ // or anything else that effectively needs to shut down scheduling.
+ MachineBasicBlock::iterator Current = MBB.end();
+ unsigned Count = MBB.size(), CurrentCount = Count;
+ for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) {
+ MachineInstr &MI = *std::prev(I);
+ --Count;
+ // Calls are not scheduling boundaries before register allocation, but
+ // post-ra we don't gain anything by scheduling across calls since we
+ // don't need to worry about register pressure.
+ if (MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) {
+ Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
+ Scheduler.setEndIndex(CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
+ Scheduler.EmitSchedule();
+ Current = &MI;
+ CurrentCount = Count;
+ Scheduler.Observe(MI, CurrentCount);
+ }
+ I = MI;
+ if (MI.isBundle())
+ Count -= MI.getBundleSize();
+ }
+ assert(Count == 0 && "Instruction count mismatch!");
+ assert((MBB.begin() == Current || CurrentCount != 0) &&
+ "Instruction count mismatch!");
+ Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount);
+ Scheduler.setEndIndex(CurrentCount);
+ Scheduler.schedule();
+ Scheduler.exitRegion();
+ Scheduler.EmitSchedule();
+
+ // Clean up register live-range state.
+ Scheduler.finishBlock();
+
+ // Update register kills
+ Scheduler.fixupKills(&MBB);
+ }
+
+ return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
+ // Call the superclass.
+ ScheduleDAGInstrs::startBlock(BB);
+
+ // Reset the hazard recognizer and anti-dep breaker.
+ HazardRec->Reset();
+ if (AntiDepBreak)
+ AntiDepBreak->StartBlock(BB);
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::schedule() {
+ // Build the scheduling graph.
+ buildSchedGraph(AA);
+
+ if (AntiDepBreak) {
+ unsigned Broken =
+ AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+ EndIndex, DbgValues);
+
+ if (Broken != 0) {
+ // We made changes. Update the dependency graph.
+ // Theoretically we could update the graph in place:
+ // When a live range is changed to use a different register, remove
+ // the def's anti-dependence *and* output-dependence edges due to
+ // that register, and add new anti-dependence and output-dependence
+ // edges based on the next live range of the register.
+ ScheduleDAG::clearDAG();
+ buildSchedGraph(AA);
+
+ NumFixedAnti += Broken;
+ }
+ }
+
+ postprocessDAG();
+
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+ DEBUG(
+ for (const SUnit &SU : SUnits) {
+ SU.dumpAll(this);
+ dbgs() << '\n';
+ }
+ );
+
+ AvailableQueue.initNodes(SUnits);
+ ListScheduleTopDown();
+ AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) {
+ if (AntiDepBreak)
+ AntiDepBreak->Observe(MI, Count, EndIndex);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::finishBlock() {
+ if (AntiDepBreak)
+ AntiDepBreak->FinishBlock();
+
+ // Call the superclass.
+ ScheduleDAGInstrs::finishBlock();
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void SchedulePostRATDList::postprocessDAG() {
+ for (auto &M : Mutations)
+ M->apply(this);
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ --SuccSU->NumPredsLeft;
+
+ // Standard scheduler algorithms will recompute the depth of the successor
+ // here as such:
+ // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+ //
+ // However, we lazily compute node depth instead. Note that
+ // ScheduleNodeTopDown has already updated the depth of this node which causes
+ // all descendents to be marked dirty. Setting the successor depth explicitly
+ // here would cause depth to be recomputed for all its ancestors. If the
+ // successor is not yet ready (because of a transitively redundant edge) then
+ // this causes depth computation to be quadratic in the size of the DAG.
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+ PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ ReleaseSucc(SU, &*I);
+ }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() &&
+ "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ ReleaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue.scheduledNode(SU);
+}
+
+/// emitNoop - Add a noop to the current instruction sequence.
+void SchedulePostRATDList::emitNoop(unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+ HazardRec->EmitNoop();
+ Sequence.push_back(nullptr); // NULL here means noop
+ ++NumNoops;
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // We're scheduling top-down but we're visiting the regions in
+ // bottom-up order, so we don't know the hazards at the start of a
+ // region. So assume no hazards (this should usually be ok as most
+ // blocks are a single region).
+ HazardRec->Reset();
+
+ // Release any successors of the special Entry node.
+ ReleaseSuccessors(&EntrySU);
+
+ // Add all leaves to Available queue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) {
+ AvailableQueue.push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // In any cycle where we can't schedule any instructions, we must
+ // stall or emit a noop, depending on the target.
+ bool CycleHasInsts = false;
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ unsigned MinDepth = ~0u;
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() <= CurCycle) {
+ AvailableQueue.push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ } else if (PendingQueue[i]->getDepth() < MinDepth)
+ MinDepth = PendingQueue[i]->getDepth();
+ }
+
+ DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
+
+ SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;
+ bool HasNoopHazards = false;
+ while (!AvailableQueue.empty()) {
+ SUnit *CurSUnit = AvailableQueue.pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ if (HazardRec->ShouldPreferAnother(CurSUnit)) {
+ if (!NotPreferredSUnit) {
+ // If this is the first non-preferred node for this cycle, then
+ // record it and continue searching for a preferred node. If this
+ // is not the first non-preferred node, then treat it as though
+ // there had been a hazard.
+ NotPreferredSUnit = CurSUnit;
+ continue;
+ }
+ } else {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // If we have a non-preferred node, push it back onto the available list.
+ // If we did not find a preferred node, then schedule this first
+ // non-preferred node.
+ if (NotPreferredSUnit) {
+ if (!FoundSUnit) {
+ DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n");
+ FoundSUnit = NotPreferredSUnit;
+ } else {
+ AvailableQueue.push(NotPreferredSUnit);
+ }
+
+ NotPreferredSUnit = nullptr;
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue.push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule...
+ if (FoundSUnit) {
+ // If we need to emit noops prior to this instruction, then do so.
+ unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);
+ for (unsigned i = 0; i != NumPreNoops; ++i)
+ emitNoop(CurCycle);
+
+ // ... schedule the node...
+ ScheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+ CycleHasInsts = true;
+ if (HazardRec->atIssueLimit()) {
+ DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ } else {
+ if (CycleHasInsts) {
+ DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem,
+ // just advance the current cycle and try again.
+ DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ emitNoop(CurCycle);
+ }
+
+ ++CurCycle;
+ CycleHasInsts = false;
+ }
+ }
+
+#ifndef NDEBUG
+ unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+ RegionBegin = RegionEnd;
+
+ // If first instruction was a DBG_VALUE then put it back.
+ if (FirstDbgValue)
+ BB->splice(RegionEnd, BB, FirstDbgValue);
+
+ // Then re-insert them according to the given schedule.
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ BB->splice(RegionEnd, BB, SU->getInstr());
+ else
+ // Null SUnit* is a noop.
+ TII->insertNoop(*BB, RegionEnd);
+
+ // Update the Begin iterator, as the first instruction in the block
+ // may have been scheduled later.
+ if (i == 0)
+ RegionBegin = std::prev(RegionEnd);
+ }
+
+ // Reinsert any remaining debug_values.
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+ std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
+ MachineInstr *DbgValue = P.first;
+ MachineBasicBlock::iterator OrigPrivMI = P.second;
+ BB->splice(++OrigPrivMI, BB, DbgValue);
+ }
+ DbgValues.clear();
+ FirstDbgValue = nullptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
new file mode 100644
index 000000000000..fbc2bc64f425
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -0,0 +1,94 @@
+//===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR lowering for the llvm.load.relative intrinsic.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+
+bool lowerLoadRelative(Function &F) {
+ if (F.use_empty())
+ return false;
+
+ bool Changed = false;
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Type *Int32PtrTy = Int32Ty->getPointerTo();
+ Type *Int8Ty = Type::getInt8Ty(F.getContext());
+
+ for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
+ auto CI = dyn_cast<CallInst>(I->getUser());
+ ++I;
+ if (!CI || CI->getCalledValue() != &F)
+ continue;
+
+ IRBuilder<> B(CI);
+ Value *OffsetPtr =
+ B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
+ Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
+ Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4);
+
+ Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
+
+ CI->replaceAllUsesWith(ResultPtr);
+ CI->eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool lowerIntrinsics(Module &M) {
+ bool Changed = false;
+ for (Function &F : M) {
+ if (F.getName().startswith("llvm.load.relative."))
+ Changed |= lowerLoadRelative(F);
+ }
+ return Changed;
+}
+
+class PreISelIntrinsicLoweringLegacyPass : public ModulePass {
+public:
+ static char ID;
+ PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) { return lowerIntrinsics(M); }
+};
+
+char PreISelIntrinsicLoweringLegacyPass::ID;
+}
+
+INITIALIZE_PASS(PreISelIntrinsicLoweringLegacyPass,
+ "pre-isel-intrinsic-lowering", "Pre-ISel Intrinsic Lowering",
+ false, false)
+
+namespace llvm {
+ModulePass *createPreISelIntrinsicLoweringPass() {
+ return new PreISelIntrinsicLoweringLegacyPass;
+}
+
+PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (!lowerIntrinsics(M))
+ return PreservedAnalyses::all();
+ else
+ return PreservedAnalyses::none();
+}
+} // End llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
new file mode 100644
index 000000000000..d27ea2f51867
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -0,0 +1,168 @@
+//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "processimplicitdefs"
+
+namespace {
+/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
+/// for each use. Add isUndef marker to implicit_def defs and their uses.
+class ProcessImplicitDefs : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ SmallSetVector<MachineInstr*, 16> WorkList;
+
+ void processImplicitDef(MachineInstr *MI);
+ bool canTurnIntoImplicitDef(MachineInstr *MI);
+
+public:
+ static char ID;
+
+ ProcessImplicitDefs() : MachineFunctionPass(ID) {
+ initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &au) const override;
+
+ bool runOnMachineFunction(MachineFunction &fn) override;
+};
+} // end anonymous namespace
+
+char ProcessImplicitDefs::ID = 0;
+char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
+
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+
+void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
+ if (!MI->isCopyLike() &&
+ !MI->isInsertSubreg() &&
+ !MI->isRegSequence() &&
+ !MI->isPHI())
+ return false;
+ for (const MachineOperand &MO : MI->operands())
+ if (MO.isReg() && MO.isUse() && MO.readsReg())
+ return false;
+ return true;
+}
+
+void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
+ DEBUG(dbgs() << "Processing " << *MI);
+ unsigned Reg = MI->getOperand(0).getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // For virtual registers, mark all uses as <undef>, and convert users to
+ // implicit-def when possible.
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ MO.setIsUndef();
+ MachineInstr *UserMI = MO.getParent();
+ if (!canTurnIntoImplicitDef(UserMI))
+ continue;
+ DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI);
+ UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ WorkList.insert(UserMI);
+ }
+ MI->eraseFromParent();
+ return;
+ }
+
+ // This is a physreg implicit-def.
+ // Look for the first instruction to use or define an alias.
+ MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
+ MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
+ bool Found = false;
+ for (++UserMI; UserMI != UserE; ++UserMI) {
+ for (MachineOperand &MO : UserMI->operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned UserReg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(UserReg) ||
+ !TRI->regsOverlap(Reg, UserReg))
+ continue;
+ // UserMI uses or redefines Reg. Set <undef> flags on all uses.
+ Found = true;
+ if (MO.isUse())
+ MO.setIsUndef();
+ }
+ if (Found)
+ break;
+ }
+
+ // If we found the using MI, we can erase the IMPLICIT_DEF.
+ if (Found) {
+ DEBUG(dbgs() << "Physreg user: " << *UserMI);
+ MI->eraseFromParent();
+ return;
+ }
+
+ // Using instr wasn't found, it could be in another block.
+ // Leave the physreg IMPLICIT_DEF, but trim any extra operands.
+ for (unsigned i = MI->getNumOperands() - 1; i; --i)
+ MI->RemoveOperand(i);
+ DEBUG(dbgs() << "Keeping physreg: " << *MI);
+}
+
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into
+/// <undef> operands.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
+
+ DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+
+ bool Changed = false;
+
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
+ assert(WorkList.empty() && "Inconsistent worklist state");
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+ MFI != MFE; ++MFI) {
+ // Scan the basic block for implicit defs.
+ for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(),
+ MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI)
+ if (MBBI->isImplicitDef())
+ WorkList.insert(&*MBBI);
+
+ if (WorkList.empty())
+ continue;
+
+ DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size()
+ << " implicit defs.\n");
+ Changed = true;
+
+ // Drain the WorkList to recursively process any new implicit defs.
+ do processImplicitDef(WorkList.pop_back_val());
+ while (!WorkList.empty());
+ }
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 000000000000..20a9a394ebd0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,1234 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <climits>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pei"
+
+typedef SmallVector<MachineBasicBlock *, 4> MBBVector;
+static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
+ unsigned &MinCSFrameIndex,
+ unsigned &MaxCXFrameIndex,
+ const MBBVector &SaveBlocks,
+ const MBBVector &RestoreBlocks);
+
+static void doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS);
+
+namespace {
+class PEI : public MachineFunctionPass {
+public:
+ static char ID;
+ explicit PEI(const TargetMachine *TM = nullptr) : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+
+ if (TM && (!TM->usesPhysRegsForPEI())) {
+ SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
+ unsigned &, unsigned &, const MBBVector &,
+ const MBBVector &) {};
+ ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {};
+ } else {
+ SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
+ ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs;
+ UsesCalleeSaves = true;
+ }
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ MachineFunctionProperties MFP;
+ if (UsesCalleeSaves)
+ MFP.set(MachineFunctionProperties::Property::AllVRegsAllocated);
+ return MFP;
+ }
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+ std::function<void(MachineFunction &MF, RegScavenger *RS,
+ unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex,
+ const MBBVector &SaveBlocks,
+ const MBBVector &RestoreBlocks)>
+ SpillCalleeSavedRegisters;
+ std::function<void(MachineFunction &MF, RegScavenger *RS)>
+ ScavengeFrameVirtualRegs;
+
+ bool UsesCalleeSaves = false;
+
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex = std::numeric_limits<unsigned>::max();
+ unsigned MaxCSFrameIndex = 0;
+
+ // Save and Restore blocks of the current function. Typically there is a
+ // single save block, unless Windows EH funclets are involved.
+ MBBVector SaveBlocks;
+ MBBVector RestoreBlocks;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the current function.
+ bool FrameIndexVirtualScavenging;
+
+ void calculateCallFrameInfo(MachineFunction &Fn);
+ void calculateSaveRestoreBlocks(MachineFunction &Fn);
+
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+ int &SPAdj);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+};
+} // namespace
+
+char PEI::ID = 0;
+char &llvm::PrologEpilogCodeInserterID = PEI::ID;
+
+static cl::opt<unsigned>
+WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1),
+ cl::desc("Warn for stack size bigger than the given"
+ " number"));
+
+INITIALIZE_TM_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_TM_PASS_END(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion & Frame Finalization",
+ false, false)
+
+MachineFunctionPass *
+llvm::createPrologEpilogInserterPass(const TargetMachine *TM) {
+ return new PEI(TM);
+}
+
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+STATISTIC(NumBytesStackSpace,
+ "Number of bytes used for stack in all functions");
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<StackProtector>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
+/// StackObjSet - A set of stack object indexes
+typedef SmallSetVector<int, 8> StackObjSet;
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+ const Function* F = Fn.getFunction();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
+ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+ // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+ // function's frame information. Also eliminates call frame pseudo
+ // instructions.
+ calculateCallFrameInfo(Fn);
+
+ // Determine placement of CSR spill/restore code and prolog/epilog code:
+ // place all spills in the entry block, all restores in return blocks.
+ calculateSaveRestoreBlocks(Fn);
+
+ // Handle CSR spilling and restoring, for targets that need it.
+ SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex,
+ SaveBlocks, RestoreBlocks);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TFI->processFunctionBeforeFrameFinalized(Fn, RS);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters()
+ // must be called before this function in order to set the AdjustsStack
+ // and MaxCallFrameSize variables.
+ if (!F->hasFnAttribute(Attribute::Naked))
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ // If register scavenging is needed, as we've enabled doing it as a
+ // post-pass, scavenge the virtual registers that frame index elimination
+ // inserted.
+ if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
+ ScavengeFrameVirtualRegs(Fn, RS);
+
+ // Clear any vregs created by virtual scavenging.
+ Fn.getRegInfo().clearVirtRegs();
+ }
+
+ // Warn on stack size when we exceeds the given limit.
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ uint64_t StackSize = MFI->getStackSize();
+ if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
+ DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
+ F->getContext().diagnose(DiagStackSize);
+ }
+
+ delete RS;
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+ MFI->setSavePoint(nullptr);
+ MFI->setRestorePoint(nullptr);
+ return true;
+}
+
+/// Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ unsigned MaxCallFrameSize = 0;
+ bool AdjustsStack = MFI->adjustsStack();
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ // Early exit for targets which have no call frame setup/destroy pseudo
+ // instructions.
+ if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
+ return;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImm();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ AdjustsStack = true;
+ FrameSDOps.push_back(I);
+ } else if (I->isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
+ }
+
+ MFI->setAdjustsStack(AdjustsStack);
+ MFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (std::vector<MachineBasicBlock::iterator>::iterator
+ i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+ MachineBasicBlock::iterator I = *i;
+
+ // If call frames are not being included as part of the stack frame, and
+ // the target doesn't indicate otherwise, remove the call frame pseudos
+ // here. The sub/add sp instruction pairs are still inserted, but we don't
+ // need to track the SP adjustment for frame index elimination.
+ if (TFI->canSimplifyCallFramePseudos(Fn))
+ TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+}
+
+/// Compute the sets of entry and return blocks for saving and restoring
+/// callee-saved registers, and placing prolog and epilog code.
+void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) {
+ const MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Even when we do not change any CSR, we still want to insert the
+ // prologue and epilogue of the function.
+ // So set the save points for those.
+
+ // Use the points found by shrink-wrapping, if any.
+ if (MFI->getSavePoint()) {
+ SaveBlocks.push_back(MFI->getSavePoint());
+ assert(MFI->getRestorePoint() && "Both restore and save must be set");
+ MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ return;
+ }
+
+ // Save refs to entry and return blocks.
+ SaveBlocks.push_back(&Fn.front());
+ for (MachineBasicBlock &MBB : Fn) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
+}
+
+static void assignCalleeSavedSpillSlots(MachineFunction &F,
+ const BitVector &SavedRegs,
+ unsigned &MinCSFrameIndex,
+ unsigned &MaxCSFrameIndex) {
+ if (SavedRegs.empty())
+ return;
+
+ const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (SavedRegs.test(Reg))
+ CSI.push_back(CalleeSavedInfo(Reg));
+ }
+
+ const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+ if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
+ // If target doesn't implement this, use generic code.
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (auto &CS : CSI) {
+ unsigned Reg = CS.getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+ CS.setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx =
+ MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+ }
+
+ CS.setFrameIdx(FrameIdx);
+ }
+ }
+
+ MFI->setCalleeSavedInfo(CSI);
+}
+
+/// Helper function to update the liveness information for the callee-saved
+/// registers.
+static void updateLiveness(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Visited will contain all the basic blocks that are in the region
+ // where the callee saved registers are alive:
+ // - Anything that is not Save or Restore -> LiveThrough.
+ // - Save -> LiveIn.
+ // - Restore -> LiveOut.
+ // The live-out is not attached to the block, so no need to keep
+ // Restore in this set.
+ SmallPtrSet<MachineBasicBlock *, 8> Visited;
+ SmallVector<MachineBasicBlock *, 8> WorkList;
+ MachineBasicBlock *Entry = &MF.front();
+ MachineBasicBlock *Save = MFI->getSavePoint();
+
+ if (!Save)
+ Save = Entry;
+
+ if (Entry != Save) {
+ WorkList.push_back(Entry);
+ Visited.insert(Entry);
+ }
+ Visited.insert(Save);
+
+ MachineBasicBlock *Restore = MFI->getRestorePoint();
+ if (Restore)
+ // By construction Restore cannot be visited, otherwise it
+ // means there exists a path to Restore that does not go
+ // through Save.
+ WorkList.push_back(Restore);
+
+ while (!WorkList.empty()) {
+ const MachineBasicBlock *CurBB = WorkList.pop_back_val();
+ // By construction, the region that is after the save point is
+ // dominated by the Save and post-dominated by the Restore.
+ if (CurBB == Save && Save != Restore)
+ continue;
+ // Enqueue all the successors not already visited.
+ // Those are by construction either before Save or after Restore.
+ for (MachineBasicBlock *SuccBB : CurBB->successors())
+ if (Visited.insert(SuccBB).second)
+ WorkList.push_back(SuccBB);
+ }
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (MachineBasicBlock *MBB : Visited) {
+ MCPhysReg Reg = CSI[i].getReg();
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ if (!MBB->isLiveIn(Reg))
+ MBB->addLiveIn(Reg);
+ }
+ }
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function.
+///
+static void insertCSRSpillsAndRestores(MachineFunction &Fn,
+ const MBBVector &SaveBlocks,
+ const MBBVector &RestoreBlocks) {
+ // Get callee saved register information.
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ MFI->setCalleeSavedInfoValid(true);
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ MachineBasicBlock::iterator I;
+
+ // Spill using target interface.
+ for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ I = SaveBlock->begin();
+ if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+ // Update the live-in information of all the blocks up to the save point.
+ updateLiveness(Fn);
+ }
+
+ // Restore using target interface.
+ for (MachineBasicBlock *MBB : RestoreBlocks) {
+ I = MBB->end();
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+}
+
+static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
+ unsigned &MinCSFrameIndex,
+ unsigned &MaxCSFrameIndex,
+ const MBBVector &SaveBlocks,
+ const MBBVector &RestoreBlocks) {
+ const Function *F = Fn.getFunction();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ MinCSFrameIndex = std::numeric_limits<unsigned>::max();
+ MaxCSFrameIndex = 0;
+
+ // Determine which of the registers in the callee save list should be saved.
+ BitVector SavedRegs;
+ TFI->determineCalleeSaves(Fn, SavedRegs, RS);
+
+ // Assign stack slots for any callee-saved registers that must be spilled.
+ assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex);
+
+ // Add the code to save and restore the callee saved registers.
+ if (!F->hasFnAttribute(Attribute::Naked))
+ insertCSRSpillsAndRestores(Fn, SaveBlocks, RestoreBlocks);
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign, unsigned Skew) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = alignTo(Offset, Align, Skew);
+
+ if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, Offset);
+ Offset += MFI->getObjectSize(FrameIdx);
+ }
+}
+
+/// Compute which bytes of fixed and callee-save stack area are unused and keep
+/// track of them in StackBytesFree.
+///
+static inline void
+computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
+ unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
+ int64_t FixedCSEnd, BitVector &StackBytesFree) {
+ // Avoid undefined int64_t -> int conversion below in extreme case.
+ if (FixedCSEnd > std::numeric_limits<int>::max())
+ return;
+
+ StackBytesFree.resize(FixedCSEnd, true);
+
+ SmallVector<int, 16> AllocatedFrameSlots;
+ // Add fixed objects.
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i)
+ AllocatedFrameSlots.push_back(i);
+ // Add callee-save objects.
+ for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
+ AllocatedFrameSlots.push_back(i);
+
+ for (int i : AllocatedFrameSlots) {
+ // These are converted from int64_t, but they should always fit in int
+ // because of the FixedCSEnd check above.
+ int ObjOffset = MFI->getObjectOffset(i);
+ int ObjSize = MFI->getObjectSize(i);
+ int ObjStart, ObjEnd;
+ if (StackGrowsDown) {
+ // ObjOffset is negative when StackGrowsDown is true.
+ ObjStart = -ObjOffset - ObjSize;
+ ObjEnd = -ObjOffset;
+ } else {
+ ObjStart = ObjOffset;
+ ObjEnd = ObjOffset + ObjSize;
+ }
+ // Ignore fixed holes that are in the previous stack frame.
+ if (ObjEnd > 0)
+ StackBytesFree.reset(ObjStart, ObjEnd);
+ }
+}
+
+/// Assign frame object to an unused portion of the stack in the fixed stack
+/// object range. Return true if the allocation was successful.
+///
+static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
+ bool StackGrowsDown, unsigned MaxAlign,
+ BitVector &StackBytesFree) {
+ if (MFI->isVariableSizedObjectIndex(FrameIdx))
+ return false;
+
+ if (StackBytesFree.none()) {
+ // clear it to speed up later scavengeStackSlot calls to
+ // StackBytesFree.none()
+ StackBytesFree.clear();
+ return false;
+ }
+
+ unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx);
+ if (ObjAlign > MaxAlign)
+ return false;
+
+ int64_t ObjSize = MFI->getObjectSize(FrameIdx);
+ int FreeStart;
+ for (FreeStart = StackBytesFree.find_first(); FreeStart != -1;
+ FreeStart = StackBytesFree.find_next(FreeStart)) {
+
+ // Check that free space has suitable alignment.
+ unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart;
+ if (alignTo(ObjStart, ObjAlign) != ObjStart)
+ continue;
+
+ if (FreeStart + ObjSize > StackBytesFree.size())
+ return false;
+
+ bool AllBytesFree = true;
+ for (unsigned Byte = 0; Byte < ObjSize; ++Byte)
+ if (!StackBytesFree.test(FreeStart + Byte)) {
+ AllBytesFree = false;
+ break;
+ }
+ if (AllBytesFree)
+ break;
+ }
+
+ if (FreeStart == -1)
+ return false;
+
+ if (StackGrowsDown) {
+ int ObjStart = -(FreeStart + ObjSize);
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart
+ << "]\n");
+ MFI->setObjectOffset(FrameIdx, ObjStart);
+ } else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart
+ << "]\n");
+ MFI->setObjectOffset(FrameIdx, FreeStart);
+ }
+
+ StackBytesFree.reset(FreeStart, FreeStart + ObjSize);
+ return true;
+}
+
+/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
+/// those required to be close to the Stack Protector) to stack offsets.
+static void
+AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+ SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo *MFI, bool StackGrowsDown,
+ int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
+
+ for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
+ E = UnassignedObjs.end(); I != E; ++I) {
+ int i = *I;
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+ ProtectedObjs.insert(i);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ StackProtector *SP = &getAnalysis<StackProtector>();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ LocalAreaOffset = -LocalAreaOffset;
+ assert(LocalAreaOffset >= 0
+ && "Local area offset should be in direction of stack growth");
+ int64_t Offset = LocalAreaOffset;
+
+ // Skew to be applied to alignment.
+ unsigned Skew = TFI.getStackAlignmentSkew(Fn);
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // Adjust 'Offset' to point to the end of last fixed sized preallocated
+ // object.
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -MFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If the stack grows down, we need to add the size to find the lowest
+ // address of the object.
+ Offset += MFI->getObjectSize(i);
+
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = alignTo(Offset, Align, Skew);
+
+ DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
+ MFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else if (MaxCSFrameIndex >= MinCSFrameIndex) {
+ // Be careful about underflow in comparisons agains MinCSFrameIndex.
+ for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = alignTo(Offset, Align, Skew);
+
+ DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
+ MFI->setObjectOffset(i, Offset);
+ Offset += MFI->getObjectSize(i);
+ }
+ }
+
+ // FixedCSEnd is the stack offset to the end of the fixed and callee-save
+ // stack area.
+ int64_t FixedCSEnd = Offset;
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // incoming stack pointer if a frame pointer is required and is closer
+ // to the incoming rather than the final stack pointer.
+ const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
+ bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
+ TFI.isFPCloseToIncomingSP() &&
+ RegInfo->useFPForScavengingIndex(Fn) &&
+ !RegInfo->needsStackRealignment(Fn));
+ if (RS && EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+ }
+
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = alignTo(Offset, Align, Skew);
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ // Retrieve the Exception Handler registration node.
+ int EHRegNodeFrameIndex = INT_MAX;
+ if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo())
+ EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex;
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> ProtectedObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ StackObjSet LargeArrayObjs;
+ StackObjSet SmallArrayObjs;
+ StackObjSet AddrOfObjs;
+
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+ Offset, MaxAlign, Skew);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i ||
+ EHRegNodeFrameIndex == (int)i)
+ continue;
+
+ switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+ case StackProtector::SSPLK_None:
+ continue;
+ case StackProtector::SSPLK_SmallArray:
+ SmallArrayObjs.insert(i);
+ continue;
+ case StackProtector::SSPLK_AddrOf:
+ AddrOfObjs.insert(i);
+ continue;
+ case StackProtector::SSPLK_LargeArray:
+ LargeArrayObjs.insert(i);
+ continue;
+ }
+ llvm_unreachable("Unexpected SSPLayoutKind.");
+ }
+
+ AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign, Skew);
+ AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign, Skew);
+ AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign, Skew);
+ }
+
+ SmallVector<int, 8> ObjectsToAllocate;
+
+ // Then prepare to assign frame offsets to stack objects that are not used to
+ // spill callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i ||
+ EHRegNodeFrameIndex == (int)i)
+ continue;
+ if (ProtectedObjs.count(i))
+ continue;
+
+ // Add the objects that we need to allocate to our working set.
+ ObjectsToAllocate.push_back(i);
+ }
+
+ // Allocate the EH registration node first if one is present.
+ if (EHRegNodeFrameIndex != INT_MAX)
+ AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset,
+ MaxAlign, Skew);
+
+ // Give the targets a chance to order the objects the way they like it.
+ if (Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
+ Fn.getTarget().Options.StackSymbolOrdering)
+ TFI.orderFrameObjects(Fn, ObjectsToAllocate);
+
+ // Keep track of which bytes in the fixed and callee-save range are used so we
+ // can use the holes when allocating later stack objects. Only do this if
+ // stack protector isn't being used and the target requests it and we're
+ // optimizing.
+ BitVector StackBytesFree;
+ if (!ObjectsToAllocate.empty() &&
+ Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
+ MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
+ computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
+ FixedCSEnd, StackBytesFree);
+
+ // Now walk the objects and actually assign base offsets to them.
+ for (auto &Object : ObjectsToAllocate)
+ if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign,
+ StackBytesFree))
+ AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && !EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+ }
+
+ if (!TFI.targetHandlesStackFrameRounding()) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI.getStackAlignment();
+ else
+ StackAlign = TFI.getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ Offset = alignTo(Offset, StackAlign, Skew);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ int64_t StackSize = Offset - LocalAreaOffset;
+ MFI->setStackSize(StackSize);
+ NumBytesStackSpace += StackSize;
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+
+ // Add prologue to the function...
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.emitPrologue(Fn, *SaveBlock);
+
+ // Add epilogue to restore the callee-save registers in each exiting block.
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+ TFI.emitEpilogue(Fn, *RestoreBlock);
+
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.inlineStackProbe(Fn, *SaveBlock);
+
+ // Emit additional code that is required to support segmented stacks, if
+ // we've been asked for it. This, when linked with a runtime with support
+ // for segmented stacks (libgcc is one), will result in allocating stack
+ // space in small chunks instead of one large contiguous block.
+ if (Fn.shouldSplitStack()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+ }
+
+ // Emit additional code that is required to explicitly handle the stack in
+ // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+ // approach is rather similar to that of Segmented Stacks, but it uses a
+ // different conditional check and another BIF for allocating more stack
+ // space.
+ if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ if (!TFI.needsFrameIndexResolution(Fn)) return;
+
+ // Store SPAdj at exit of a basic block.
+ SmallVector<int, 8> SPState;
+ SPState.resize(Fn.getNumBlockIDs());
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+
+ // Iterate over the reachable blocks in DFS order.
+ for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
+ DFI != DFE; ++DFI) {
+ int SPAdj = 0;
+ // Check the exit state of the DFS stack predecessor.
+ if (DFI.getPathLength() >= 2) {
+ MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
+ assert(Reachable.count(StackPred) &&
+ "DFS stack predecessor is already visited.\n");
+ SPAdj = SPState[StackPred->getNumber()];
+ }
+ MachineBasicBlock *BB = *DFI;
+ replaceFrameIndices(BB, Fn, SPAdj);
+ SPState[BB->getNumber()] = SPAdj;
+ }
+
+ // Handle the unreachable blocks.
+ for (auto &BB : Fn) {
+ if (Reachable.count(&BB))
+ // Already handled in DFS traversal.
+ continue;
+ int SPAdj = 0;
+ replaceFrameIndices(&BB, Fn, SPAdj);
+ }
+}
+
+void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+ int &SPAdj) {
+ assert(Fn.getSubtarget().getRegisterInfo() &&
+ "getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(*BB);
+
+ bool InsideCallSequence = false;
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
+ SPAdj += TII.getSPAdjust(*I);
+
+ I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
+ continue;
+ }
+
+ MachineInstr &MI = *I;
+ bool DoIncr = true;
+ bool DidFinishLoop = true;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isFI())
+ continue;
+
+ // Frame indices in debug values are encoded in a target independent
+ // way with simply the frame index and offset rather than any
+ // target-specific addressing mode.
+ if (MI.isDebugValue()) {
+ assert(i == 0 && "Frame indices can only appear as the first "
+ "operand of a DBG_VALUE machine instruction");
+ unsigned Reg;
+ MachineOperand &Offset = MI.getOperand(1);
+ Offset.setImm(
+ Offset.getImm() +
+ TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg));
+ MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
+ continue;
+ }
+
+ // TODO: This code should be commoned with the code for
+ // PATCHPOINT. There's no good reason for the difference in
+ // implementation other than historical accident. The only
+ // remaining difference is the unconditional use of the stack
+ // pointer as the base register.
+ if (MI.getOpcode() == TargetOpcode::STATEPOINT) {
+ assert((!MI.isDebugValue() || i == 0) &&
+ "Frame indicies can only appear as the first operand of a "
+ "DBG_VALUE machine instruction");
+ unsigned Reg;
+ MachineOperand &Offset = MI.getOperand(i + 1);
+ int refOffset = TFI->getFrameIndexReferencePreferSP(
+ Fn, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
+ Offset.setImm(Offset.getImm() + refOffset);
+ MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
+ continue;
+ }
+
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+ TRI.eliminateFrameIndex(MI, SPAdj, i,
+ FrameIndexVirtualScavenging ? nullptr : RS);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ DidFinishLoop = false;
+ break;
+ }
+
+ // If we are looking at a call sequence, we need to keep track of
+ // the SP adjustment made by each instruction in the sequence.
+ // This includes both the frame setup/destroy pseudos (handled above),
+ // as well as other instructions that have side effects w.r.t the SP.
+ // Note that this must come after eliminateFrameIndex, because
+ // if I itself referred to a frame index, we shouldn't count its own
+ // adjustment.
+ if (DidFinishLoop && InsideCallSequence)
+ SPAdj += TII.getSPAdjust(MI);
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && !FrameIndexVirtualScavenging && DidFinishLoop)
+ RS->forward(MI);
+ }
+}
+
+/// doScavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
+static void
+doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) {
+ // Run through the instructions and find any virtual registers.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineBasicBlock &MBB : MF) {
+ RS->enterBasicBlock(MBB);
+
+ int SPAdj = 0;
+
+ // The instruction stream may change in the loop, so check MBB.end()
+ // directly.
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ // We might end up here again with a NULL iterator if we scavenged a
+ // register for which we inserted spill code for definition by what was
+ // originally the first instruction in MBB.
+ if (I == MachineBasicBlock::iterator(nullptr))
+ I = MBB.begin();
+
+ const MachineInstr &MI = *I;
+ MachineBasicBlock::iterator J = std::next(I);
+ MachineBasicBlock::iterator P =
+ I == MBB.begin() ? MachineBasicBlock::iterator(nullptr)
+ : std::prev(I);
+
+ // RS should process this instruction before we might scavenge at this
+ // location. This is because we might be replacing a virtual register
+ // defined by this instruction, and if so, registers killed by this
+ // instruction are available, and defined registers are not.
+ RS->forward(I);
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MO.isDef() && "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
+ // Replace this reference to the virtual register with the
+ // scratch register.
+ assert(ScratchReg && "Missing scratch register!");
+ MRI.replaceRegWith(Reg, ScratchReg);
+
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS->setRegUsed(ScratchReg);
+ }
+
+ // If the scavenger needed to use one of its spill slots, the
+ // spill code will have been inserted in between I and J. This is a
+ // problem because we need the spill code before I: Move I to just
+ // prior to J.
+ if (I != std::prev(J)) {
+ MBB.splice(J, &MBB, I);
+
+ // Before we move I, we need to prepare the RS to visit I again.
+ // Specifically, RS will assert if it sees uses of registers that
+ // it believes are undefined. Because we have already processed
+ // register kills in I, when it visits I again, it will believe that
+ // those registers are undefined. To avoid this situation, unprocess
+ // the instruction I.
+ assert(RS->getCurrentPosition() == I &&
+ "The register scavenger has an unexpected position");
+ I = P;
+ RS->unprocess(P);
+ } else
+ ++I;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 000000000000..804a4c3dad66
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,139 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static const char *const PSVNames[] = {
+ "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
+ "GlobalValueCallEntry", "ExternalSymbolCallEntry"};
+
+PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {}
+
+PseudoSourceValue::~PseudoSourceValue() {}
+
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+ O << PSVNames[Kind];
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ if (isStack())
+ return false;
+ if (isGOT() || isConstantPool() || isJumpTable())
+ return true;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+}
+
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+ if (isStack() || isGOT() || isConstantPool() || isJumpTable())
+ return false;
+ llvm_unreachable("Unknown PseudoSourceValue!");
+}
+
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+ return !(isGOT() || isConstantPool() || isJumpTable());
+}
+
+bool FixedStackPseudoSourceValue::isConstant(
+ const MachineFrameInfo *MFI) const {
+ return MFI && MFI->isImmutableObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+ if (!MFI)
+ return true;
+ return MFI->isAliasedObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+ if (!MFI)
+ return true;
+ // Spill slots will not alias any LLVM IR value.
+ return !MFI->isSpillSlotObjectIndex(FI);
+}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+}
+
+CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind)
+ : PseudoSourceValue(Kind) {}
+
+bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool CallEntryPseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+ return false;
+}
+
+GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue(
+ const GlobalValue *GV)
+ : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {}
+
+ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES)
+ : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {}
+
+PseudoSourceValueManager::PseudoSourceValueManager()
+ : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT),
+ JumpTablePSV(PseudoSourceValue::JumpTable),
+ ConstantPoolPSV(PseudoSourceValue::ConstantPool) {}
+
+const PseudoSourceValue *PseudoSourceValueManager::getStack() {
+ return &StackPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getGOT() { return &GOTPSV; }
+
+const PseudoSourceValue *PseudoSourceValueManager::getConstantPool() {
+ return &ConstantPoolPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() {
+ return &JumpTablePSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) {
+ std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
+ if (!V)
+ V = llvm::make_unique<FixedStackPseudoSourceValue>(FI);
+ return V.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
+ std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
+ GlobalCallEntries[GV];
+ if (!E)
+ E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV);
+ return E.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
+ std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
+ ExternalCallEntries[ES];
+ if (!E)
+ E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES);
+ return E.get();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
new file mode 100644
index 000000000000..93eeb9cba457
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -0,0 +1,161 @@
+//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class which provides common functionality
+// for LiveIntervalUnion-based register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumNewQueued , "Number of new live ranges queued");
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::desc("Verify during register allocation"));
+
+const char RegAllocBase::TimerGroupName[] = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Pin the vtable to this file.
+void RegAllocBase::anchor() {}
+
+void RegAllocBase::init(VirtRegMap &vrm,
+ LiveIntervals &lis,
+ LiveRegMatrix &mat) {
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ Matrix = &mat;
+ MRI->freezeReservedRegs(vrm.getMachineFunction());
+ RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+ NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ enqueue(&LIS->getInterval(Reg));
+ }
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+ seedLiveRegs();
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (LiveInterval *VirtReg = dequeue()) {
+ assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+ // Unused registers can appear when the spiller coalesces snippets.
+ if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+ DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+ aboutToRemoveInterval(*VirtReg);
+ LIS->removeInterval(VirtReg->reg);
+ continue;
+ }
+
+ // Invalidate all interference queries, live ranges could have changed.
+ Matrix->invalidateVirtRegs();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ DEBUG(dbgs() << "\nselectOrSplit "
+ << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
+ << ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
+ typedef SmallVector<unsigned, 4> VirtRegVec;
+ VirtRegVec SplitVRegs;
+ unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg == ~0u) {
+ // selectOrSplit failed to find a register!
+ // Probably caused by an inline asm.
+ MachineInstr *MI = nullptr;
+ for (MachineRegisterInfo::reg_instr_iterator
+ I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end();
+ I != E; ) {
+ MachineInstr *TmpMI = &*(I++);
+ if (TmpMI->isInlineAsm()) {
+ MI = TmpMI;
+ break;
+ }
+ }
+ if (MI)
+ MI->emitError("inline assembly requires more registers than available");
+ else
+ report_fatal_error("ran out of registers during register allocation");
+ // Keep going after reporting the error.
+ VRM->assignVirt2Phys(VirtReg->reg,
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+ continue;
+ }
+
+ if (AvailablePhysReg)
+ Matrix->assign(*VirtReg, AvailablePhysReg);
+
+ for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+ I != E; ++I) {
+ LiveInterval *SplitVirtReg = &LIS->getInterval(*I);
+ assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
+ aboutToRemoveInterval(*SplitVirtReg);
+ LIS->removeInterval(SplitVirtReg->reg);
+ continue;
+ }
+ DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ "expect split value in virtual register");
+ enqueue(SplitVirtReg);
+ ++NumNewQueued;
+ }
+ }
+}
+
+void RegAllocBase::postOptimization() {
+ spiller().postOptimization();
+ for (auto DeadInst : DeadRemats) {
+ LIS->RemoveMachineInstrFromMaps(*DeadInst);
+ DeadInst->eraseFromParent();
+ }
+ DeadRemats.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 000000000000..296ffe8692c6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,122 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H
+#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class LiveRegMatrix;
+class Spiller;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They must also override enqueue/dequeue to provide an
+/// assignment order.
+class RegAllocBase {
+ virtual void anchor();
+protected:
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ VirtRegMap *VRM;
+ LiveIntervals *LIS;
+ LiveRegMatrix *Matrix;
+ RegisterClassInfo RegClassInfo;
+
+ /// Inst which is a def of an original reg and whose defs are already all
+ /// dead after remat is saved in DeadRemats. The deletion of such inst is
+ /// postponed till all the allocations are done, so its remat expr is
+ /// always available for the remat of all the siblings of the original reg.
+ SmallPtrSet<MachineInstr *, 32> DeadRemats;
+
+ RegAllocBase()
+ : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {}
+
+ virtual ~RegAllocBase() {}
+
+ // A RegAlloc pass should call this before allocatePhysRegs.
+ void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat);
+
+ // The top-level driver. The output is a VirtRegMap that us updated with
+ // physical register assignments.
+ void allocatePhysRegs();
+
+ // Include spiller post optimization and removing dead defs left because of
+ // rematerialization.
+ virtual void postOptimization();
+
+ // Get a temporary reference to a Spiller instance.
+ virtual Spiller &spiller() = 0;
+
+ /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+ virtual void enqueue(LiveInterval *LI) = 0;
+
+ /// dequeue - Return the next unassigned register, or NULL.
+ virtual LiveInterval *dequeue() = 0;
+
+ // A RegAlloc pass should override this to provide the allocation heuristics.
+ // Each call must guarantee forward progess by returning an available PhysReg
+ // or new set of split live virtual registers. It is up to the splitter to
+ // converge quickly toward fully spilled live ranges.
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<unsigned> &splitLVRs) = 0;
+
+ // Use this group name for NamedRegionTimer.
+ static const char TimerGroupName[];
+
+ /// Method called when the allocator is about to remove a LiveInterval.
+ virtual void aboutToRemoveInterval(LiveInterval &LI) {}
+
+public:
+ /// VerifyEnabled - True when -verify-regalloc is given.
+ static bool VerifyEnabled;
+
+private:
+ void seedLiveRegs();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 000000000000..11dfda67377f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,298 @@
+//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "AllocationOrder.h"
+#include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cstdlib>
+#include <queue>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+ createBasicRegisterAllocator);
+
+namespace {
+ struct CompSpillWeight {
+ bool operator()(LiveInterval *A, LiveInterval *B) const {
+ return A->weight < B->weight;
+ }
+ };
+}
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+ // context
+ MachineFunction *MF;
+
+ // state
+ std::unique_ptr<Spiller> SpillerInstance;
+ std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
+ CompSpillWeight> Queue;
+
+ // Scratch space. Allocated here to avoid repeated malloc calls in
+ // selectOrSplit().
+ BitVector UsableRegs;
+
+public:
+ RABasic();
+
+ /// Return the pass name.
+ const char* getPassName() const override {
+ return "Basic Register Allocator";
+ }
+
+ /// RABasic analysis usage.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ void releaseMemory() override;
+
+ Spiller &spiller() override { return *SpillerInstance; }
+
+ void enqueue(LiveInterval *LI) override {
+ Queue.push(LI);
+ }
+
+ LiveInterval *dequeue() override {
+ if (Queue.empty())
+ return nullptr;
+ LiveInterval *LI = Queue.top();
+ Queue.pop();
+ return LI;
+ }
+
+ unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<unsigned> &SplitVRegs) override;
+
+ /// Perform register allocation.
+ bool runOnMachineFunction(MachineFunction &mf) override;
+
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<unsigned> &SplitVRegs);
+
+ static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+ SpillerInstance.reset();
+}
+
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<unsigned> &SplitVRegs) {
+ // Record each interference and determine if all are spillable before mutating
+ // either the union or live intervals.
+ SmallVector<LiveInterval*, 8> Intfs;
+
+ // Collect interferences assigned to any alias of the physical register.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ Q.collectInterferingVRegs();
+ if (Q.seenUnspillableVReg())
+ return false;
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
+ return false;
+ Intfs.push_back(Intf);
+ }
+ }
+ DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+ " interferences with " << VirtReg << "\n");
+ assert(!Intfs.empty() && "expected interference");
+
+ // Spill each interfering vreg allocated to PhysReg or an alias.
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval &Spill = *Intfs[i];
+
+ // Skip duplicates.
+ if (!VRM->hasPhys(Spill.reg))
+ continue;
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ Matrix->unassign(Spill);
+
+ // Spill the extracted interval.
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ spiller().spill(LRE);
+ }
+ return true;
+}
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<unsigned> &SplitVRegs) {
+ // Populate a list of physical register spill candidates.
+ SmallVector<unsigned, 8> PhysRegSpillCands;
+
+ // Check for an available register in this class.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
+ while (unsigned PhysReg = Order.next()) {
+ // Check for interference in PhysReg
+ switch (Matrix->checkInterference(VirtReg, PhysReg)) {
+ case LiveRegMatrix::IK_Free:
+ // PhysReg is available, allocate it.
+ return PhysReg;
+
+ case LiveRegMatrix::IK_VirtReg:
+ // Only virtual registers in the way, we may be able to spill them.
+ PhysRegSpillCands.push_back(PhysReg);
+ continue;
+
+ default:
+ // RegMask or RegUnit interference.
+ continue;
+ }
+ }
+
+ // Try to spill another interfering reg with less spill weight.
+ for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+ PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+ if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
+ continue;
+
+ assert(!Matrix->checkInterference(VirtReg, *PhysRegI) &&
+ "Interference after spill.");
+ // Tell the caller to allocate to this newly freed physical register.
+ return *PhysRegI;
+ }
+
+ // No other spill candidates were found, so spill the current VirtReg.
+ DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+ if (!VirtReg.isSpillable())
+ return ~0u;
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ spiller().spill(LRE);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << mf.getName() << '\n');
+
+ MF = &mf;
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
+
+ calculateSpillWeightsAndHints(*LIS, *MF, VRM,
+ getAnalysis<MachineLoopInfo>(),
+ getAnalysis<MachineBlockFrequencyInfo>());
+
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+
+ allocatePhysRegs();
+ postOptimization();
+
+ // Diagnostic output before rewriting
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+ releaseMemory();
+ return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+ return new RABasic();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 000000000000..55fb33edd720
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,1122 @@
+//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
+
+static RegisterRegAlloc
+ fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+ class RAFast : public MachineFunctionPass {
+ public:
+ static char ID;
+ RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
+ isBulkSpilling(false) {}
+
+ private:
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ RegisterClassInfo RegClassInfo;
+
+ // Basic block currently being allocated.
+ MachineBasicBlock *MBB;
+
+ // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+ // values are spilled.
+ IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+ // Everything we know about a live virtual register.
+ struct LiveReg {
+ MachineInstr *LastUse; // Last instr to use reg.
+ unsigned VirtReg; // Virtual register number.
+ unsigned PhysReg; // Currently held here.
+ unsigned short LastOpNum; // OpNum on LastUse.
+ bool Dirty; // Register needs spill.
+
+ explicit LiveReg(unsigned v)
+ : LastUse(nullptr), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false){}
+
+ unsigned getSparseSetIndex() const {
+ return TargetRegisterInfo::virtReg2Index(VirtReg);
+ }
+ };
+
+ typedef SparseSet<LiveReg> LiveRegMap;
+
+ // LiveVirtRegs - This map contains entries for each virtual register
+ // that is currently available in a physical register.
+ LiveRegMap LiveVirtRegs;
+
+ DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
+
+ // RegState - Track the state of a physical register.
+ enum RegState {
+ // A disabled register is not available for allocation, but an alias may
+ // be in use. A register can only be moved out of the disabled state if
+ // all aliases are disabled.
+ regDisabled,
+
+ // A free register is not currently in use and can be allocated
+ // immediately without checking aliases.
+ regFree,
+
+ // A reserved register has been assigned explicitly (e.g., setting up a
+ // call parameter), and it remains reserved until it is used.
+ regReserved
+
+ // A register state may also be a virtual register number, indication that
+ // the physical register is currently allocated to a virtual register. In
+ // that case, LiveVirtRegs contains the inverse mapping.
+ };
+
+ // PhysRegState - One of the RegState enums, or a virtreg.
+ std::vector<unsigned> PhysRegState;
+
+ // Set of register units.
+ typedef SparseSet<unsigned> UsedInInstrSet;
+
+ // Set of register units that are used in the current instruction, and so
+ // cannot be allocated.
+ UsedInInstrSet UsedInInstr;
+
+ // Mark a physreg as used in this instruction.
+ void markRegUsedInInstr(unsigned PhysReg) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ UsedInInstr.insert(*Units);
+ }
+
+ // Check if a physreg or any of its aliases are used in this instruction.
+ bool isRegUsedInInstr(unsigned PhysReg) const {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ if (UsedInInstr.count(*Units))
+ return true;
+ return false;
+ }
+
+ // SkippedInstrs - Descriptors of instructions whose clobber list was
+ // ignored because all registers were spilled. It is still necessary to
+ // mark all the clobbered registers as used by the function.
+ SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
+
+ // isBulkSpilling - This flag is set when LiveRegMap will be cleared
+ // completely after spilling all live registers. LiveRegMap entries should
+ // not be erased.
+ bool isBulkSpilling;
+
+ enum : unsigned {
+ spillClean = 1,
+ spillDirty = 100,
+ spillImpossible = ~0u
+ };
+ public:
+ const char *getPassName() const override {
+ return "Fast Register Allocator";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ MachineFunctionProperties getSetProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
+ private:
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+ void AllocateBasicBlock();
+ void handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead);
+ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+ bool isLastUseOfLocalReg(MachineOperand&);
+
+ void addKillFlag(const LiveReg&);
+ void killVirtReg(LiveRegMap::iterator);
+ void killVirtReg(unsigned VirtReg);
+ void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+ void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+
+ void usePhysReg(MachineOperand&);
+ void definePhysReg(MachineInstr &MI, unsigned PhysReg, RegState NewState);
+ unsigned calcSpillCost(unsigned PhysReg) const;
+ void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
+ LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+ return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ }
+ LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
+ LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator,
+ unsigned Hint);
+ LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ void spillAll(MachineBasicBlock::iterator MI);
+ bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+ };
+ char RAFast::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+ // Find the location Reg would belong...
+ int SS = StackSlotForVirtReg[VirtReg];
+ if (SS != -1)
+ return SS; // Already has space allocated?
+
+ // Allocate a new stack object for this spill location...
+ int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+
+ // Assign the slot.
+ StackSlotForVirtReg[VirtReg] = FrameIdx;
+ return FrameIdx;
+}
+
+/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
+/// its virtual register, and it is guaranteed to be a block-local register.
+///
+bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+ // If the register has ever been spilled or reloaded, we conservatively assume
+ // it is a global register used in multiple blocks.
+ if (StackSlotForVirtReg[MO.getReg()] != -1)
+ return false;
+
+ // Check that the use/def chain has exactly one operand - MO.
+ MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
+ if (&*I != &MO)
+ return false;
+ return ++I == MRI->reg_nodbg_end();
+}
+
+/// addKillFlag - Set kill flags on last use of a virtual register.
+void RAFast::addKillFlag(const LiveReg &LR) {
+ if (!LR.LastUse) return;
+ MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
+ if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
+ if (MO.getReg() == LR.PhysReg)
+ MO.setIsKill();
+ else
+ LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
+ }
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+ addKillFlag(*LRI);
+ assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+ "Broken RegState mapping");
+ PhysRegState[LRI->PhysReg] = regFree;
+ // Erase from LiveVirtRegs unless we're spilling in bulk.
+ if (!isBulkSpilling)
+ LiveVirtRegs.erase(LRI);
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "killVirtReg needs a virtual register");
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ if (LRI != LiveVirtRegs.end())
+ killVirtReg(LRI);
+}
+
+/// spillVirtReg - This method spills the value specified by VirtReg into the
+/// corresponding stack slot if needed.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Spilling a physical register is illegal!");
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
+ spillVirtReg(MI, LRI);
+}
+
+/// spillVirtReg - Do the actual work of spilling.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
+ LiveRegMap::iterator LRI) {
+ LiveReg &LR = *LRI;
+ assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
+
+ if (LR.Dirty) {
+ // If this physreg is used by the instruction, we want to kill it on the
+ // instruction, not on the spill.
+ bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
+ LR.Dirty = false;
+ DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
+ << " in " << PrintReg(LR.PhysReg, TRI));
+ const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg);
+ int FI = getStackSpaceFor(LRI->VirtReg, RC);
+ DEBUG(dbgs() << " to stack slot #" << FI << "\n");
+ TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+ ++NumStores; // Update statistics
+
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // identify spilled location as the place to find corresponding variable's
+ // value.
+ SmallVectorImpl<MachineInstr *> &LRIDbgValues =
+ LiveDbgValueMap[LRI->VirtReg];
+ for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
+ MachineInstr *DBG = LRIDbgValues[li];
+ const MDNode *Var = DBG->getDebugVariable();
+ const MDNode *Expr = DBG->getDebugExpression();
+ bool IsIndirect = DBG->isIndirectDebugValue();
+ uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
+ DebugLoc DL = DBG->getDebugLoc();
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ MachineInstr *NewDV =
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
+ .addFrameIndex(FI)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ assert(NewDV->getParent() == MBB && "dangling parent pointer");
+ (void)NewDV;
+ DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
+ }
+ // Now this register is spilled there is should not be any DBG_VALUE
+ // pointing to this register because they are all pointing to spilled value
+ // now.
+ LRIDbgValues.clear();
+ if (SpillKill)
+ LR.LastUse = nullptr; // Don't kill register again
+ }
+ killVirtReg(LRI);
+}
+
+/// spillAll - Spill all dirty virtregs without killing them.
+void RAFast::spillAll(MachineBasicBlock::iterator MI) {
+ if (LiveVirtRegs.empty()) return;
+ isBulkSpilling = true;
+ // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
+ // of spilling here is deterministic, if arbitrary.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
+ i != e; ++i)
+ spillVirtReg(MI, i);
+ LiveVirtRegs.clear();
+ isBulkSpilling = false;
+}
+
+/// usePhysReg - Handle the direct use of a physical register.
+/// Check that the register is not used by a virtreg.
+/// Kill the physreg, marking it free.
+/// This may add implicit kills to MO->getParent() and invalidate MO.
+void RAFast::usePhysReg(MachineOperand &MO) {
+ unsigned PhysReg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ "Bad usePhysReg operand");
+
+ // Ignore undef uses.
+ if (MO.isUndef())
+ return;
+
+ markRegUsedInInstr(PhysReg);
+ switch (PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ PhysRegState[PhysReg] = regFree;
+ // Fall through
+ case regFree:
+ MO.setIsKill();
+ return;
+ default:
+ // The physreg was allocated to a virtual register. That means the value we
+ // wanted has been clobbered.
+ llvm_unreachable("Instruction uses an allocated register");
+ }
+
+ // Maybe a superregister is reserved?
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
+ switch (PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ // Either PhysReg is a subregister of Alias and we mark the
+ // whole register as free, or PhysReg is the superregister of
+ // Alias and we mark all the aliases as disabled before freeing
+ // PhysReg.
+ // In the latter case, since PhysReg was disabled, this means that
+ // its value is defined only by physical sub-registers. This check
+ // is performed by the assert of the default case in this loop.
+ // Note: The value of the superregister may only be partial
+ // defined, that is why regDisabled is a valid state for aliases.
+ assert((TRI->isSuperRegister(PhysReg, Alias) ||
+ TRI->isSuperRegister(Alias, PhysReg)) &&
+ "Instruction is not using a subregister of a reserved register");
+ // Fall through.
+ case regFree:
+ if (TRI->isSuperRegister(PhysReg, Alias)) {
+ // Leave the superregister in the working set.
+ PhysRegState[Alias] = regFree;
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ }
+ // Some other alias was in the working set - clear it.
+ PhysRegState[Alias] = regDisabled;
+ break;
+ default:
+ llvm_unreachable("Instruction uses an alias of an allocated register");
+ }
+ }
+
+ // All aliases are disabled, bring register into working set.
+ PhysRegState[PhysReg] = regFree;
+ MO.setIsKill();
+}
+
+/// definePhysReg - Mark PhysReg as reserved or free after spilling any
+/// virtregs. This is very similar to defineVirtReg except the physreg is
+/// reserved instead of allocated.
+void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
+ RegState NewState) {
+ markRegUsedInInstr(PhysReg);
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[PhysReg] = NewState;
+ return;
+ }
+
+ // This is a disabled register, disable all aliases.
+ PhysRegState[PhysReg] = NewState;
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[Alias] = regDisabled;
+ if (TRI->isSuperRegister(PhysReg, Alias))
+ return;
+ break;
+ }
+ }
+}
+
+
+// calcSpillCost - Return the cost of spilling clearing out PhysReg and
+// aliases so it is free for allocation.
+// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
+// can be allocated directly.
+// Returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+ if (isRegUsedInInstr(PhysReg)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
+ return spillImpossible;
+ }
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regFree:
+ return 0;
+ case regReserved:
+ DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
+ << PrintReg(PhysReg, TRI) << " is reserved already.\n");
+ return spillImpossible;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ return I->Dirty ? spillDirty : spillClean;
+ }
+ }
+
+ // This is a disabled register, add up cost of aliases.
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
+ unsigned Cost = 0;
+ for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+ unsigned Alias = *AI;
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regFree:
+ ++Cost;
+ break;
+ case regReserved:
+ return spillImpossible;
+ default: {
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ Cost += I->Dirty ? spillDirty : spillClean;
+ break;
+ }
+ }
+ }
+ return Cost;
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
+///
+void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) {
+ DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to "
+ << PrintReg(PhysReg, TRI) << "\n");
+ PhysRegState[PhysReg] = LR.VirtReg;
+ assert(!LR.PhysReg && "Already assigned a physreg");
+ LR.PhysReg = PhysReg;
+}
+
+RAFast::LiveRegMap::iterator
+RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
+}
+
+/// allocVirtReg - Allocate a physical register for VirtReg.
+RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI,
+ LiveRegMap::iterator LRI,
+ unsigned Hint) {
+ const unsigned VirtReg = LRI->VirtReg;
+
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Can only allocate virtual registers");
+
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+ // Ignore invalid hints.
+ if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+ !RC->contains(Hint) || !MRI->isAllocatable(Hint)))
+ Hint = 0;
+
+ // Take hint when possible.
+ if (Hint) {
+ // Ignore the hint if we would have to spill a dirty register.
+ unsigned Cost = calcSpillCost(Hint);
+ if (Cost < spillDirty) {
+ if (Cost)
+ definePhysReg(MI, Hint, regFree);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, Hint);
+ }
+ }
+
+ ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC);
+
+ // First try to find a completely free register.
+ for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
+ unsigned PhysReg = *I;
+ if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
+ assignVirtToPhysReg(*LRI, PhysReg);
+ return LRI;
+ }
+ }
+
+ DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+ << TRI->getRegClassName(RC) << "\n");
+
+ unsigned BestReg = 0, BestCost = spillImpossible;
+ for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
+ unsigned Cost = calcSpillCost(*I);
+ DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
+ DEBUG(dbgs() << "\tCost: " << Cost << "\n");
+ DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
+ // Cost is 0 when all aliases are already disabled.
+ if (Cost == 0) {
+ assignVirtToPhysReg(*LRI, *I);
+ return LRI;
+ }
+ if (Cost < BestCost)
+ BestReg = *I, BestCost = Cost;
+ }
+
+ if (BestReg) {
+ definePhysReg(MI, BestReg, regFree);
+ // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+ // That invalidates LRI, so run a new lookup for VirtReg.
+ return assignVirtToPhysReg(VirtReg, BestReg);
+ }
+
+ // Nothing we can do. Report an error and keep going with a bad allocation.
+ if (MI.isInlineAsm())
+ MI.emitError("inline assembly requires more registers than available");
+ else
+ MI.emitError("ran out of registers during register allocation");
+ definePhysReg(MI, *AO.begin(), regFree);
+ return assignVirtToPhysReg(VirtReg, *AO.begin());
+}
+
+/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
+RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ unsigned VirtReg,
+ unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+ if (New) {
+ // If there is no hint, peek at the only use of this register.
+ if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+ MRI->hasOneNonDBGUse(VirtReg)) {
+ const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
+ // It's a copy, use the destination register as a hint.
+ if (UseMI.isCopyLike())
+ Hint = UseMI.getOperand(0).getReg();
+ }
+ LRI = allocVirtReg(MI, LRI, Hint);
+ } else if (LRI->LastUse) {
+ // Redefining a live register - kill at the last use, unless it is this
+ // instruction defining VirtReg multiple times.
+ if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
+ addKillFlag(*LRI);
+ }
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = &MI;
+ LRI->LastOpNum = OpNum;
+ LRI->Dirty = true;
+ markRegUsedInInstr(LRI->PhysReg);
+ return LRI;
+}
+
+/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
+RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI,
+ unsigned OpNum,
+ unsigned VirtReg,
+ unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+ MachineOperand &MO = MI.getOperand(OpNum);
+ if (New) {
+ LRI = allocVirtReg(MI, LRI, Hint);
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+ << PrintReg(LRI->PhysReg, TRI) << "\n");
+ TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI);
+ ++NumLoads;
+ } else if (LRI->Dirty) {
+ if (isLastUseOfLocalReg(MO)) {
+ DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+ if (MO.isUse())
+ MO.setIsKill();
+ else
+ MO.setIsDead();
+ } else if (MO.isKill()) {
+ DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+ MO.setIsDead(false);
+ }
+ } else if (MO.isKill()) {
+ // We must remove kill flags from uses of reloaded registers because the
+ // register would be killed immediately, and there might be a second use:
+ // %foo = OR %x<kill>, %x
+ // This would cause a second reload of %x into a different register.
+ DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+ MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+ MO.setIsDead(false);
+ }
+ assert(LRI->PhysReg && "Register not assigned");
+ LRI->LastUse = &MI;
+ LRI->LastOpNum = OpNum;
+ markRegUsedInInstr(LRI->PhysReg);
+ return LRI;
+}
+
+// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
+// subregs. This may invalidate any operand pointers.
+// Return true if the operand kills its register.
+bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
+ MachineOperand &MO = MI->getOperand(OpNum);
+ bool Dead = MO.isDead();
+ if (!MO.getSubReg()) {
+ MO.setReg(PhysReg);
+ return MO.isKill() || Dead;
+ }
+
+ // Handle subregister index.
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+ MO.setSubReg(0);
+
+ // A kill flag implies killing the full register. Add corresponding super
+ // register kill.
+ if (MO.isKill()) {
+ MI->addRegisterKilled(PhysReg, TRI, true);
+ return true;
+ }
+
+ // A <def,read-undef> of a sub-register requires an implicit def of the full
+ // register.
+ if (MO.isDef() && MO.isUndef())
+ MI->addRegisterDefined(PhysReg, TRI);
+
+ return Dead;
+}
+
+// Handle special instruction operand like early clobbers and tied ops when
+// there are additional physreg defines.
+void RAFast::handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead) {
+ DEBUG(dbgs() << "Scanning for through registers:");
+ SmallSet<unsigned, 8> ThroughRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
+ (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+ if (ThroughRegs.insert(Reg).second)
+ DEBUG(dbgs() << ' ' << PrintReg(Reg));
+ }
+ }
+
+ // If any physreg defines collide with preallocated through registers,
+ // we must spill and reallocate.
+ DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ markRegUsedInInstr(Reg);
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (ThroughRegs.count(PhysRegState[*AI]))
+ definePhysReg(*MI, *AI, regFree);
+ }
+ }
+
+ SmallVector<unsigned, 8> PartialDefs;
+ DEBUG(dbgs() << "Allocating tied uses.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ unsigned DefIdx = 0;
+ if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
+ DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
+ << DefIdx << ".\n");
+ LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0);
+ unsigned PhysReg = LRI->PhysReg;
+ setPhysReg(MI, i, PhysReg);
+ // Note: we don't update the def operand yet. That would cause the normal
+ // def-scan to attempt spilling.
+ } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+ DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+ // Reload the register, but don't assign to the operand just yet.
+ // That would confuse the later phys-def processing pass.
+ LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0);
+ PartialDefs.push_back(LRI->PhysReg);
+ }
+ }
+
+ DEBUG(dbgs() << "Allocating early clobbers.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (!MO.isEarlyClobber())
+ continue;
+ // Note: defineVirtReg may invalidate MO.
+ LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, 0);
+ unsigned PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, i, PhysReg))
+ VirtDead.push_back(Reg);
+ }
+
+ // Restore UsedInInstr to a state usable for allocating normal virtual uses.
+ UsedInInstr.clear();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+ << " as used in instr\n");
+ markRegUsedInInstr(Reg);
+ }
+
+ // Also mark PartialDefs as used to avoid reallocation.
+ for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
+ markRegUsedInInstr(PartialDefs[i]);
+}
+
+void RAFast::AllocateBasicBlock() {
+ DEBUG(dbgs() << "\nAllocating " << *MBB);
+
+ PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+ assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
+
+ MachineBasicBlock::iterator MII = MBB->begin();
+
+ // Add live-in registers as live.
+ for (const auto &LI : MBB->liveins())
+ if (MRI->isAllocatable(LI.PhysReg))
+ definePhysReg(*MII, LI.PhysReg, regReserved);
+
+ SmallVector<unsigned, 8> VirtDead;
+ SmallVector<MachineInstr*, 32> Coalesced;
+
+ // Otherwise, sequentially allocate each instruction in the MBB.
+ while (MII != MBB->end()) {
+ MachineInstr *MI = &*MII++;
+ const MCInstrDesc &MCID = MI->getDesc();
+ DEBUG({
+ dbgs() << "\n>> " << *MI << "Regs:";
+ for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+ if (PhysRegState[Reg] == regDisabled) continue;
+ dbgs() << " " << TRI->getName(Reg);
+ switch(PhysRegState[Reg]) {
+ case regFree:
+ break;
+ case regReserved:
+ dbgs() << "*";
+ break;
+ default: {
+ dbgs() << '=' << PrintReg(PhysRegState[Reg]);
+ LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
+ assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+ if (I->Dirty)
+ dbgs() << "*";
+ assert(I->PhysReg == Reg && "Bad inverse map");
+ break;
+ }
+ }
+ }
+ dbgs() << '\n';
+ // Check that LiveVirtRegs is the inverse.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+ e = LiveVirtRegs.end(); i != e; ++i) {
+ assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
+ "Bad map key");
+ assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
+ "Bad map value");
+ assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
+ }
+ });
+
+ // Debug values are not allowed to change codegen in any way.
+ if (MI->isDebugValue()) {
+ bool ScanDbgValue = true;
+ while (ScanDbgValue) {
+ ScanDbgValue = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
+ if (LRI != LiveVirtRegs.end())
+ setPhysReg(MI, i, LRI->PhysReg);
+ else {
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS == -1) {
+ // We can't allocate a physreg for a DebugValue, sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+ MO.setReg(0);
+ }
+ else {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ bool IsIndirect = MI->isIndirectDebugValue();
+ uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+ const MDNode *Var = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(
+ cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
+ TII->get(TargetOpcode::DBG_VALUE))
+ .addFrameIndex(SS)
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ DEBUG(dbgs() << "Modifying debug info due to spill:"
+ << "\t" << *NewDV);
+ // Scan NewDV operands from the beginning.
+ MI = NewDV;
+ ScanDbgValue = true;
+ break;
+ }
+ }
+ LiveDbgValueMap[Reg].push_back(MI);
+ }
+ }
+ // Next instruction.
+ continue;
+ }
+
+ // If this is a copy, we may be able to coalesce.
+ unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
+ if (MI->isCopy()) {
+ CopyDst = MI->getOperand(0).getReg();
+ CopySrc = MI->getOperand(1).getReg();
+ CopyDstSub = MI->getOperand(0).getSubReg();
+ CopySrcSub = MI->getOperand(1).getSubReg();
+ }
+
+ // Track registers used by instruction.
+ UsedInInstr.clear();
+
+ // First scan.
+ // Mark physreg uses and early clobbers as used.
+ // Find the end of the virtreg operands
+ unsigned VirtOpEnd = 0;
+ bool hasTiedOps = false;
+ bool hasEarlyClobbers = false;
+ bool hasPartialRedefs = false;
+ bool hasPhysDefs = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask()) {
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+ continue;
+ }
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg) continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ VirtOpEnd = i+1;
+ if (MO.isUse()) {
+ hasTiedOps = hasTiedOps ||
+ MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+ } else {
+ if (MO.isEarlyClobber())
+ hasEarlyClobbers = true;
+ if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+ hasPartialRedefs = true;
+ }
+ continue;
+ }
+ if (!MRI->isAllocatable(Reg)) continue;
+ if (MO.isUse()) {
+ usePhysReg(MO);
+ } else if (MO.isEarlyClobber()) {
+ definePhysReg(*MI, Reg,
+ (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
+ hasEarlyClobbers = true;
+ } else
+ hasPhysDefs = true;
+ }
+
+ // The instruction may have virtual register operands that must be allocated
+ // the same register at use-time and def-time: early clobbers and tied
+ // operands. If there are also physical defs, these registers must avoid
+ // both physical defs and uses, making them more constrained than normal
+ // operands.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
+ // We didn't detect inline asm tied operands above, so just make this extra
+ // pass for all inline asm.
+ if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
+ handleThroughOperands(MI, VirtDead);
+ // Don't attempt coalescing when we have funny stuff going on.
+ CopyDst = 0;
+ // Pretend we have early clobbers so the use operands get marked below.
+ // This is not necessary for the common case of a single tied use.
+ hasEarlyClobbers = true;
+ }
+
+ // Second scan.
+ // Allocate virtreg uses.
+ for (unsigned i = 0; i != VirtOpEnd; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ if (MO.isUse()) {
+ LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, CopyDst);
+ unsigned PhysReg = LRI->PhysReg;
+ CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
+ if (setPhysReg(MI, i, PhysReg))
+ killVirtReg(LRI);
+ }
+ }
+
+ // Track registers defined by instruction - early clobbers and tied uses at
+ // this point.
+ UsedInInstr.clear();
+ if (hasEarlyClobbers) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ // Look for physreg defs and tied uses.
+ if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
+ markRegUsedInInstr(Reg);
+ }
+ }
+
+ unsigned DefOpEnd = MI->getNumOperands();
+ if (MI->isCall()) {
+ // Spill all virtregs before a call. This serves one purpose: If an
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots.
+ // Note: although this is appealing to just consider all definitions
+ // as call-clobbered, this is not correct because some of those
+ // definitions may be used later on and we do not want to reuse
+ // those for virtual registers in between.
+ DEBUG(dbgs() << " Spilling remaining registers before call.\n");
+ spillAll(MI);
+
+ // The imp-defs are skipped below, but we still need to mark those
+ // registers as used by the function.
+ SkippedInstrs.insert(&MCID);
+ }
+
+ // Third scan.
+ // Allocate defs and collect dead defs.
+ for (unsigned i = 0; i != DefOpEnd; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!MRI->isAllocatable(Reg)) continue;
+ definePhysReg(*MI, Reg, MO.isDead() ? regFree : regReserved);
+ continue;
+ }
+ LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, CopySrc);
+ unsigned PhysReg = LRI->PhysReg;
+ if (setPhysReg(MI, i, PhysReg)) {
+ VirtDead.push_back(Reg);
+ CopyDst = 0; // cancel coalescing;
+ } else
+ CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
+ }
+
+ // Kill dead defs after the scan to ensure that multiple defs of the same
+ // register are allocated identically. We didn't need to do this for uses
+ // because we are crerating our own kill flags, and they are always at the
+ // last use.
+ for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
+ killVirtReg(VirtDead[i]);
+ VirtDead.clear();
+
+ if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
+ DEBUG(dbgs() << "-- coalescing: " << *MI);
+ Coalesced.push_back(MI);
+ } else {
+ DEBUG(dbgs() << "<< " << *MI);
+ }
+ }
+
+ // Spill all physical registers holding virtual registers now.
+ DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+ spillAll(MBB->getFirstTerminator());
+
+ // Erase all the coalesced copies. We are delaying it until now because
+ // LiveVirtRegs might refer to the instrs.
+ for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
+ MBB->erase(Coalesced[i]);
+ NumCopies += Coalesced.size();
+
+ DEBUG(MBB->dump());
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+ DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+ << "********** Function: " << Fn.getName() << '\n');
+ MF = &Fn;
+ MRI = &MF->getRegInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ MRI->freezeReservedRegs(Fn);
+ RegClassInfo.runOnMachineFunction(Fn);
+ UsedInInstr.clear();
+ UsedInInstr.setUniverse(TRI->getNumRegUnits());
+
+ assert(!MRI->isSSA() && "regalloc requires leaving SSA");
+
+ // initialize the virtual->physical register map to have a 'null'
+ // mapping for all virtual registers
+ StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+ LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+
+ // Loop over all of the basic blocks, eliminating virtual register references
+ for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
+ MBBi != MBBe; ++MBBi) {
+ MBB = &*MBBi;
+ AllocateBasicBlock();
+ }
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers.
+ MRI->clearVirtRegs();
+
+ SkippedInstrs.clear();
+ StackSlotForVirtReg.clear();
+ LiveDbgValueMap.clear();
+ return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+ return new RAFast();
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 000000000000..c4d4b1eadf3e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,2619 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "InterferenceCache.h"
+#include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
+#include "SpillPlacement.h"
+#include "Spiller.h"
+#include "SplitKit.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <queue>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits, "Number of split local live ranges");
+STATISTIC(NumEvicted, "Number of interferences evicted");
+
+static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
+ "split-spill-mode", cl::Hidden,
+ cl::desc("Spill mode for splitting live ranges"),
+ cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
+ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
+ clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
+ clEnumValEnd),
+ cl::init(SplitEditor::SM_Speed));
+
+static cl::opt<unsigned>
+LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
+ cl::desc("Last chance recoloring max depth"),
+ cl::init(5));
+
+static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
+ "lcr-max-interf", cl::Hidden,
+ cl::desc("Last chance recoloring maximum number of considered"
+ " interference at a time"),
+ cl::init(8));
+
+static cl::opt<bool>
+ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
+ cl::desc("Exhaustive Search for registers bypassing the depth "
+ "and interference cutoffs of last chance recoloring"));
+
+static cl::opt<bool> EnableLocalReassignment(
+ "enable-local-reassign", cl::Hidden,
+ cl::desc("Local reassignment can yield better allocation decisions, but "
+ "may be compile time intensive"),
+ cl::init(false));
+
+static cl::opt<bool> EnableDeferredSpilling(
+ "enable-deferred-spilling", cl::Hidden,
+ cl::desc("Instead of spilling a variable right away, defer the actual "
+ "code insertion to the end of the allocation. That way the "
+ "allocator might still find a suitable coloring for this "
+ "variable because of other evicted variables."),
+ cl::init(false));
+
+// FIXME: Find a good default for this flag and remove the flag.
+static cl::opt<unsigned>
+CSRFirstTimeCost("regalloc-csr-first-time-cost",
+ cl::desc("Cost for first time use of callee-saved register."),
+ cl::init(0), cl::Hidden);
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+ createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
+ // Convenient shortcuts.
+ typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue;
+ typedef SmallPtrSet<LiveInterval *, 4> SmallLISet;
+ typedef SmallSet<unsigned, 16> SmallVirtRegSet;
+
+ // context
+ MachineFunction *MF;
+
+ // Shortcuts to some useful interface.
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ RegisterClassInfo RCI;
+
+ // analyses
+ SlotIndexes *Indexes;
+ MachineBlockFrequencyInfo *MBFI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ EdgeBundles *Bundles;
+ SpillPlacement *SpillPlacer;
+ LiveDebugVariables *DebugVars;
+ AliasAnalysis *AA;
+
+ // state
+ std::unique_ptr<Spiller> SpillerInstance;
+ PQueue Queue;
+ unsigned NextCascade;
+
+ // Live ranges pass through a number of stages as we try to allocate them.
+ // Some of the stages may also create new live ranges:
+ //
+ // - Region splitting.
+ // - Per-block splitting.
+ // - Local splitting.
+ // - Spilling.
+ //
+ // Ranges produced by one of the stages skip the previous stages when they are
+ // dequeued. This improves performance because we can skip interference checks
+ // that are unlikely to give any results. It also guarantees that the live
+ // range splitting algorithm terminates, something that is otherwise hard to
+ // ensure.
+ enum LiveRangeStage {
+ /// Newly created live range that has never been queued.
+ RS_New,
+
+ /// Only attempt assignment and eviction. Then requeue as RS_Split.
+ RS_Assign,
+
+ /// Attempt live range splitting if assignment is impossible.
+ RS_Split,
+
+ /// Attempt more aggressive live range splitting that is guaranteed to make
+ /// progress. This is used for split products that may not be making
+ /// progress.
+ RS_Split2,
+
+ /// Live range will be spilled. No more splitting will be attempted.
+ RS_Spill,
+
+
+ /// Live range is in memory. Because of other evictions, it might get moved
+ /// in a register in the end.
+ RS_Memory,
+
+ /// There is nothing more we can do to this live range. Abort compilation
+ /// if it can't be assigned.
+ RS_Done
+ };
+
+ // Enum CutOffStage to keep a track whether the register allocation failed
+ // because of the cutoffs encountered in last chance recoloring.
+ // Note: This is used as bitmask. New value should be next power of 2.
+ enum CutOffStage {
+ // No cutoffs encountered
+ CO_None = 0,
+
+ // lcr-max-depth cutoff encountered
+ CO_Depth = 1,
+
+ // lcr-max-interf cutoff encountered
+ CO_Interf = 2
+ };
+
+ uint8_t CutOffInfo;
+
+#ifndef NDEBUG
+ static const char *const StageName[];
+#endif
+
+ // RegInfo - Keep additional information about each live range.
+ struct RegInfo {
+ LiveRangeStage Stage;
+
+ // Cascade - Eviction loop prevention. See canEvictInterference().
+ unsigned Cascade;
+
+ RegInfo() : Stage(RS_New), Cascade(0) {}
+ };
+
+ IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
+
+ LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+ return ExtraRegInfo[VirtReg.reg].Stage;
+ }
+
+ void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ ExtraRegInfo[VirtReg.reg].Stage = Stage;
+ }
+
+ template<typename Iterator>
+ void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ for (;Begin != End; ++Begin) {
+ unsigned Reg = *Begin;
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = NewStage;
+ }
+ }
+
+ /// Cost of evicting interference.
+ struct EvictionCost {
+ unsigned BrokenHints; ///< Total number of broken hints.
+ float MaxWeight; ///< Maximum spill weight evicted.
+
+ EvictionCost(): BrokenHints(0), MaxWeight(0) {}
+
+ bool isMax() const { return BrokenHints == ~0u; }
+
+ void setMax() { BrokenHints = ~0u; }
+
+ void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
+
+ bool operator<(const EvictionCost &O) const {
+ return std::tie(BrokenHints, MaxWeight) <
+ std::tie(O.BrokenHints, O.MaxWeight);
+ }
+ };
+
+ // splitting state.
+ std::unique_ptr<SplitAnalysis> SA;
+ std::unique_ptr<SplitEditor> SE;
+
+ /// Cached per-block interference maps
+ InterferenceCache IntfCache;
+
+ /// All basic blocks where the current register has uses.
+ SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+ /// Global live range splitting candidate info.
+ struct GlobalSplitCandidate {
+ // Register intended for assignment, or 0.
+ unsigned PhysReg;
+
+ // SplitKit interval index for this candidate.
+ unsigned IntvIdx;
+
+ // Interference for PhysReg.
+ InterferenceCache::Cursor Intf;
+
+ // Bundles where this candidate should be live.
+ BitVector LiveBundles;
+ SmallVector<unsigned, 8> ActiveBlocks;
+
+ void reset(InterferenceCache &Cache, unsigned Reg) {
+ PhysReg = Reg;
+ IntvIdx = 0;
+ Intf.setPhysReg(Cache, Reg);
+ LiveBundles.clear();
+ ActiveBlocks.clear();
+ }
+
+ // Set B[i] = C for every live bundle where B[i] was NoCand.
+ unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
+ unsigned Count = 0;
+ for (int i = LiveBundles.find_first(); i >= 0;
+ i = LiveBundles.find_next(i))
+ if (B[i] == NoCand) {
+ B[i] = C;
+ Count++;
+ }
+ return Count;
+ }
+ };
+
+ /// Candidate info for each PhysReg in AllocationOrder.
+ /// This vector never shrinks, but grows to the size of the largest register
+ /// class.
+ SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+
+ enum : unsigned { NoCand = ~0u };
+
+ /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
+ /// NoCand which indicates the stack interval.
+ SmallVector<unsigned, 32> BundleCand;
+
+ /// Callee-save register cost, calculated once per machine function.
+ BlockFrequency CSRCost;
+
+ /// Run or not the local reassignment heuristic. This information is
+ /// obtained from the TargetSubtargetInfo.
+ bool EnableLocalReassign;
+
+ /// Set of broken hints that may be reconciled later because of eviction.
+ SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
+
+public:
+ RAGreedy();
+
+ /// Return the pass name.
+ const char* getPassName() const override {
+ return "Greedy Register Allocator";
+ }
+
+ /// RAGreedy analysis usage.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void releaseMemory() override;
+ Spiller &spiller() override { return *SpillerInstance; }
+ void enqueue(LiveInterval *LI) override;
+ LiveInterval *dequeue() override;
+ unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override;
+ void aboutToRemoveInterval(LiveInterval &) override;
+
+ /// Perform register allocation.
+ bool runOnMachineFunction(MachineFunction &mf) override;
+
+ static char ID;
+
+private:
+ unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &,
+ SmallVirtRegSet &, unsigned = 0);
+
+ bool LRE_CanEraseVirtReg(unsigned) override;
+ void LRE_WillShrinkVirtReg(unsigned) override;
+ void LRE_DidCloneVirtReg(unsigned, unsigned) override;
+ void enqueue(PQueue &CurQueue, LiveInterval *LI);
+ LiveInterval *dequeue(PQueue &CurQueue);
+
+ BlockFrequency calcSpillCost();
+ bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
+ void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
+ void growRegion(GlobalSplitCandidate &Cand);
+ BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&);
+ bool calcCompactRegion(GlobalSplitCandidate&);
+ void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
+ void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+ unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg);
+ bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
+ bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+ void evictInterference(LiveInterval&, unsigned,
+ SmallVectorImpl<unsigned>&);
+ bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
+ SmallLISet &RecoloringCandidates,
+ const SmallVirtRegSet &FixedRegisters);
+
+ unsigned tryAssign(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<unsigned>&);
+ unsigned tryEvict(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<unsigned>&, unsigned = ~0u);
+ unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<unsigned>&);
+ /// Calculate cost of region splitting.
+ unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ BlockFrequency &BestCost,
+ unsigned &NumCands, bool IgnoreCSR);
+ /// Perform region splitting.
+ unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+ bool HasCompact,
+ SmallVectorImpl<unsigned> &NewVRegs);
+ /// Check other options before using a callee-saved register for the first
+ /// time.
+ unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
+ unsigned PhysReg, unsigned &CostPerUseLimit,
+ SmallVectorImpl<unsigned> &NewVRegs);
+ void initializeCSRCost();
+ unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<unsigned>&);
+ unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<unsigned>&);
+ unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<unsigned>&);
+ unsigned trySplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<unsigned>&);
+ unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<unsigned> &,
+ SmallVirtRegSet &, unsigned);
+ bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &,
+ SmallVirtRegSet &, unsigned);
+ void tryHintRecoloring(LiveInterval &);
+ void tryHintsRecoloring();
+
+ /// Model the information carried by one end of a copy.
+ struct HintInfo {
+ /// The frequency of the copy.
+ BlockFrequency Freq;
+ /// The virtual register or physical register.
+ unsigned Reg;
+ /// Its currently assigned register.
+ /// In case of a physical register Reg == PhysReg.
+ unsigned PhysReg;
+ HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg)
+ : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
+ };
+ typedef SmallVector<HintInfo, 4> HintsInfo;
+ BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
+ void collectHintInfo(unsigned, HintsInfo &);
+
+ bool isUnusedCalleeSavedReg(unsigned PhysReg) const;
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+#ifndef NDEBUG
+const char *const RAGreedy::StageName[] = {
+ "RS_New",
+ "RS_Assign",
+ "RS_Split",
+ "RS_Split2",
+ "RS_Spill",
+ "RS_Memory",
+ "RS_Done"
+};
+#endif
+
+// Hysteresis to use when comparing floats.
+// This helps stabilize decisions based on float comparisons.
+const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
+
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+ return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveRegMatrix>();
+ AU.addPreserved<LiveRegMatrix>();
+ AU.addRequired<EdgeBundles>();
+ AU.addRequired<SpillPlacement>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
+//===----------------------------------------------------------------------===//
+// LiveRangeEdit delegate methods
+//===----------------------------------------------------------------------===//
+
+bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ if (VRM->hasPhys(VirtReg)) {
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ aboutToRemoveInterval(LI);
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ return false;
+}
+
+void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+ if (!VRM->hasPhys(VirtReg))
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ enqueue(&LI);
+}
+
+void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+ // Cloning a register we haven't even heard about yet? Just ignore it.
+ if (!ExtraRegInfo.inBounds(Old))
+ return;
+
+ // LRE may clone a virtual register because dead code elimination causes it to
+ // be split into connected components. The new components are much smaller
+ // than the original, so they should get a new chance at being assigned.
+ // same stage as the parent.
+ ExtraRegInfo[Old].Stage = RS_Assign;
+ ExtraRegInfo.grow(New);
+ ExtraRegInfo[New] = ExtraRegInfo[Old];
+}
+
+void RAGreedy::releaseMemory() {
+ SpillerInstance.reset();
+ ExtraRegInfo.clear();
+ GlobalCand.clear();
+}
+
+void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); }
+
+void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
+ // Prioritize live ranges by size, assigning larger ranges first.
+ // The queue holds (size, reg) pairs.
+ const unsigned Size = LI->getSize();
+ const unsigned Reg = LI->reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Can only enqueue virtual registers");
+ unsigned Prio;
+
+ ExtraRegInfo.grow(Reg);
+ if (ExtraRegInfo[Reg].Stage == RS_New)
+ ExtraRegInfo[Reg].Stage = RS_Assign;
+
+ if (ExtraRegInfo[Reg].Stage == RS_Split) {
+ // Unsplit ranges that couldn't be allocated immediately are deferred until
+ // everything else has been allocated.
+ Prio = Size;
+ } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
+ // Memory operand should be considered last.
+ // Change the priority such that Memory operand are assigned in
+ // the reverse order that they came in.
+ // TODO: Make this a member variable and probably do something about hints.
+ static unsigned MemOp = 0;
+ Prio = MemOp++;
+ } else {
+ // Giant live ranges fall back to the global assignment heuristic, which
+ // prevents excessive spilling in pathological cases.
+ bool ReverseLocal = TRI->reverseLocalAssignment();
+ const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
+ bool ForceGlobal = !ReverseLocal &&
+ (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
+
+ if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
+ LIS->intervalIsInOneMBB(*LI)) {
+ // Allocate original local ranges in linear instruction order. Since they
+ // are singly defined, this produces optimal coloring in the absence of
+ // global interference and other constraints.
+ if (!ReverseLocal)
+ Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
+ else {
+ // Allocating bottom up may allow many short LRGs to be assigned first
+ // to one of the cheap registers. This could be much faster for very
+ // large blocks on targets with many physical registers.
+ Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
+ }
+ Prio |= RC.AllocationPriority << 24;
+ } else {
+ // Allocate global and split ranges in long->short order. Long ranges that
+ // don't fit should be spilled (or split) ASAP so they don't create
+ // interference. Mark a bit to prioritize global above local ranges.
+ Prio = (1u << 29) + Size;
+ }
+ // Mark a higher bit to prioritize global and local above RS_Split.
+ Prio |= (1u << 31);
+
+ // Boost ranges that have a physical register hint.
+ if (VRM->hasKnownPreference(Reg))
+ Prio |= (1u << 30);
+ }
+ // The virtual register number is a tie breaker for same-sized ranges.
+ // Give lower vreg numbers higher priority to assign them first.
+ CurQueue.push(std::make_pair(Prio, ~Reg));
+}
+
+LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
+
+LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
+ if (CurQueue.empty())
+ return nullptr;
+ LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
+ CurQueue.pop();
+ return LI;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Direct Assignment
+//===----------------------------------------------------------------------===//
+
+/// tryAssign - Try to assign VirtReg to an available register.
+unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ Order.rewind();
+ unsigned PhysReg;
+ while ((PhysReg = Order.next()))
+ if (!Matrix->checkInterference(VirtReg, PhysReg))
+ break;
+ if (!PhysReg || Order.isHint())
+ return PhysReg;
+
+ // PhysReg is available, but there may be a better choice.
+
+ // If we missed a simple hint, try to cheaply evict interference from the
+ // preferred register.
+ if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
+ if (Order.isHint(Hint)) {
+ DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+ EvictionCost MaxCost;
+ MaxCost.setBrokenHints(1);
+ if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+ evictInterference(VirtReg, Hint, NewVRegs);
+ return Hint;
+ }
+ }
+
+ // Try to evict interference from a cheaper alternative.
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+
+ // Most registers have 0 additional cost.
+ if (!Cost)
+ return PhysReg;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
+ << '\n');
+ unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+ return CheapReg ? CheapReg : PhysReg;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Interference eviction
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
+ unsigned PhysReg;
+ while ((PhysReg = Order.next())) {
+ if (PhysReg == PrevReg)
+ continue;
+
+ MCRegUnitIterator Units(PhysReg, TRI);
+ for (; Units.isValid(); ++Units) {
+ // Instantiate a "subquery", not to be confused with the Queries array.
+ LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]);
+ if (subQ.checkInterference())
+ break;
+ }
+ // If no units have interference, break out with the current PhysReg.
+ if (!Units.isValid())
+ break;
+ }
+ if (PhysReg)
+ DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
+ << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI)
+ << '\n');
+ return PhysReg;
+}
+
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
+///
+/// @param A The live range to be assigned.
+/// @param IsHint True when A is about to be assigned to its preferred
+/// register.
+/// @param B The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
+ LiveInterval &B, bool BreaksHint) {
+ bool CanSplit = getStage(B) < RS_Spill;
+
+ // Be fairly aggressive about following hints as long as the evictee can be
+ // split.
+ if (CanSplit && IsHint && !BreaksHint)
+ return true;
+
+ if (A.weight > B.weight) {
+ DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
+ return true;
+ }
+ return false;
+}
+
+/// canEvictInterference - Return true if all interferences between VirtReg and
+/// PhysReg can be evicted.
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @param IsHint True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+/// when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ bool IsHint, EvictionCost &MaxCost) {
+ // It is only possible to evict virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
+ return false;
+
+ bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
+
+ // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+ // involved in an eviction before. If a cascade number was assigned, deny
+ // evicting anything with the same or a newer cascade number. This prevents
+ // infinite eviction loops.
+ //
+ // This works out so a register without a cascade number is allowed to evict
+ // anything, and it can be evicted by anything.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = NextCascade;
+
+ EvictionCost Cost;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ // If there is 10 or more interferences, chances are one is heavier.
+ if (Q.collectInterferingVRegs(10) >= 10)
+ return false;
+
+ // Check if any interfering live range is heavier than MaxWeight.
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
+ "Only expecting virtual register interference from query");
+ // Never evict spill products. They cannot split or spill.
+ if (getStage(*Intf) == RS_Done)
+ return false;
+ // Once a live range becomes small enough, it is urgent that we find a
+ // register for it. This is indicated by an infinite spill weight. These
+ // urgent live ranges get to evict almost anything.
+ //
+ // Also allow urgent evictions of unspillable ranges from a strictly
+ // larger allocation order.
+ bool Urgent = !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
+ // Only evict older cascades or live ranges without a cascade.
+ unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+ if (Cascade <= IntfCascade) {
+ if (!Urgent)
+ return false;
+ // We permit breaking cascades for urgent evictions. It should be the
+ // last resort, though, so make it really expensive.
+ Cost.BrokenHints += 10;
+ }
+ // Would this break a satisfied hint?
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ // Update eviction cost.
+ Cost.BrokenHints += BreaksHint;
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ // Abort if this would be too expensive.
+ if (!(Cost < MaxCost))
+ return false;
+ if (Urgent)
+ continue;
+ // Apply the eviction policy for non-urgent evictions.
+ if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+ return false;
+ // If !MaxCost.isMax(), then we're just looking for a cheap register.
+ // Evicting another local live range in this case could lead to suboptimal
+ // coloring.
+ if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
+ (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
+ return false;
+ }
+ }
+ }
+ MaxCost = Cost;
+ return true;
+}
+
+/// evictInterference - Evict any interferring registers that prevent VirtReg
+/// from being assigned to Physreg. This assumes that canEvictInterference
+/// returned true.
+void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ // Make sure that VirtReg has a cascade number, and assign that cascade
+ // number to every evicted register. These live ranges than then only be
+ // evicted by a newer cascade, preventing infinite loops.
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ if (!Cascade)
+ Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+
+ DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+ << " interference: Cascade " << Cascade << '\n');
+
+ // Collect all interfering virtregs first.
+ SmallVector<LiveInterval*, 8> Intfs;
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+ ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
+ Intfs.append(IVR.begin(), IVR.end());
+ }
+
+ // Evict them second. This will invalidate the queries.
+ for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
+ LiveInterval *Intf = Intfs[i];
+ // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
+ if (!VRM->hasPhys(Intf->reg))
+ continue;
+ Matrix->unassign(*Intf);
+ assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+ VirtReg.isSpillable() < Intf->isSpillable()) &&
+ "Cannot decrease cascade number, illegal eviction");
+ ExtraRegInfo[Intf->reg].Cascade = Cascade;
+ ++NumEvicted;
+ NewVRegs.push_back(Intf->reg);
+ }
+}
+
+/// Returns true if the given \p PhysReg is a callee saved register and has not
+/// been used for allocation yet.
+bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
+ unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
+ if (CSR == 0)
+ return false;
+
+ return !Matrix->isPhysRegUsed(PhysReg);
+}
+
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<unsigned> &NewVRegs,
+ unsigned CostPerUseLimit) {
+ NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+
+ // Keep track of the cheapest interference seen so far.
+ EvictionCost BestCost;
+ BestCost.setMax();
+ unsigned BestPhys = 0;
+ unsigned OrderLimit = Order.getOrder().size();
+
+ // When we are just looking for a reduced cost per use, don't break any
+ // hints, and only evict smaller spill weights.
+ if (CostPerUseLimit < ~0u) {
+ BestCost.BrokenHints = 0;
+ BestCost.MaxWeight = VirtReg.weight;
+
+ // Check of any registers in RC are below CostPerUseLimit.
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
+ unsigned MinCost = RegClassInfo.getMinCost(RC);
+ if (MinCost >= CostPerUseLimit) {
+ DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost
+ << ", no cheaper registers to be found.\n");
+ return 0;
+ }
+
+ // It is normal for register classes to have a long tail of registers with
+ // the same cost. We don't need to look at them if they're too expensive.
+ if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
+ OrderLimit = RegClassInfo.getLastCostChange(RC);
+ DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n");
+ }
+ }
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next(OrderLimit)) {
+ if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
+ continue;
+ // The first use of a callee-saved register in a function has cost 1.
+ // Don't start using a CSR when the CostPerUseLimit is low.
+ if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+ << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
+ << '\n');
+ continue;
+ }
+
+ if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
+ continue;
+
+ // Best so far.
+ BestPhys = PhysReg;
+
+ // Stop if the hint can be used.
+ if (Order.isHint())
+ break;
+ }
+
+ if (!BestPhys)
+ return 0;
+
+ evictInterference(VirtReg, BestPhys, NewVRegs);
+ return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// addSplitConstraints - Fill out the SplitConstraints vector based on the
+/// interference pattern in Physreg and its aliases. Add the constraints to
+/// SpillPlacement and return the static cost of this split in Cost, assuming
+/// that all preferences in SplitConstraints are met.
+/// Return false if there are no bundles with positive bias.
+bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
+ BlockFrequency &Cost) {
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+
+ // Reset interference dependent info.
+ SplitConstraints.resize(UseBlocks.size());
+ BlockFrequency StaticCost = 0;
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+
+ BC.Number = BI.MBB->getNumber();
+ Intf.moveToBlock(BC.Number);
+ BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.ChangesValue = BI.FirstDef.isValid();
+
+ if (!Intf.hasInterference())
+ continue;
+
+ // Number of spill code instructions to insert.
+ unsigned Ins = 0;
+
+ // Interference for the live-in value.
+ if (BI.LiveIn) {
+ if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
+ BC.Entry = SpillPlacement::MustSpill;
+ ++Ins;
+ } else if (Intf.first() < BI.FirstInstr) {
+ BC.Entry = SpillPlacement::PrefSpill;
+ ++Ins;
+ } else if (Intf.first() < BI.LastInstr) {
+ ++Ins;
+ }
+ }
+
+ // Interference for the live-out value.
+ if (BI.LiveOut) {
+ if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
+ BC.Exit = SpillPlacement::MustSpill;
+ ++Ins;
+ } else if (Intf.last() > BI.LastInstr) {
+ BC.Exit = SpillPlacement::PrefSpill;
+ ++Ins;
+ } else if (Intf.last() > BI.FirstInstr) {
+ ++Ins;
+ }
+ }
+
+ // Accumulate the total frequency of inserted spill code.
+ while (Ins--)
+ StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
+ }
+ Cost = StaticCost;
+
+ // Add constraints for use-blocks. Note that these are the only constraints
+ // that may add a positive bias, it is downhill from here.
+ SpillPlacer->addConstraints(SplitConstraints);
+ return SpillPlacer->scanActiveBundles();
+}
+
+
+/// addThroughConstraints - Add constraints and links to SpillPlacer from the
+/// live-through blocks in Blocks.
+void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
+ ArrayRef<unsigned> Blocks) {
+ const unsigned GroupSize = 8;
+ SpillPlacement::BlockConstraint BCS[GroupSize];
+ unsigned TBS[GroupSize];
+ unsigned B = 0, T = 0;
+
+ for (unsigned i = 0; i != Blocks.size(); ++i) {
+ unsigned Number = Blocks[i];
+ Intf.moveToBlock(Number);
+
+ if (!Intf.hasInterference()) {
+ assert(T < GroupSize && "Array overflow");
+ TBS[T] = Number;
+ if (++T == GroupSize) {
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
+ T = 0;
+ }
+ continue;
+ }
+
+ assert(B < GroupSize && "Array overflow");
+ BCS[B].Number = Number;
+
+ // Interference for the live-in value.
+ if (Intf.first() <= Indexes->getMBBStartIdx(Number))
+ BCS[B].Entry = SpillPlacement::MustSpill;
+ else
+ BCS[B].Entry = SpillPlacement::PrefSpill;
+
+ // Interference for the live-out value.
+ if (Intf.last() >= SA->getLastSplitPoint(Number))
+ BCS[B].Exit = SpillPlacement::MustSpill;
+ else
+ BCS[B].Exit = SpillPlacement::PrefSpill;
+
+ if (++B == GroupSize) {
+ SpillPlacer->addConstraints(makeArrayRef(BCS, B));
+ B = 0;
+ }
+ }
+
+ SpillPlacer->addConstraints(makeArrayRef(BCS, B));
+ SpillPlacer->addLinks(makeArrayRef(TBS, T));
+}
+
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
+ // Keep track of through blocks that have not been added to SpillPlacer.
+ BitVector Todo = SA->getThroughBlocks();
+ SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
+ unsigned AddedTo = 0;
+#ifndef NDEBUG
+ unsigned Visited = 0;
+#endif
+
+ for (;;) {
+ ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
+ // Find new through blocks in the periphery of PrefRegBundles.
+ for (int i = 0, e = NewBundles.size(); i != e; ++i) {
+ unsigned Bundle = NewBundles[i];
+ // Look at all blocks connected to Bundle in the full graph.
+ ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ unsigned Block = *I;
+ if (!Todo.test(Block))
+ continue;
+ Todo.reset(Block);
+ // This is a new through block. Add it to SpillPlacer later.
+ ActiveBlocks.push_back(Block);
+#ifndef NDEBUG
+ ++Visited;
+#endif
+ }
+ }
+ // Any new blocks to add?
+ if (ActiveBlocks.size() == AddedTo)
+ break;
+
+ // Compute through constraints from the interference, or assume that all
+ // through blocks prefer spilling when forming compact regions.
+ auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
+ if (Cand.PhysReg)
+ addThroughConstraints(Cand.Intf, NewBlocks);
+ else
+ // Provide a strong negative bias on through blocks to prevent unwanted
+ // liveness on loop backedges.
+ SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
+ AddedTo = ActiveBlocks.size();
+
+ // Perhaps iterating can enable more bundles?
+ SpillPlacer->iterate();
+ }
+ DEBUG(dbgs() << ", v=" << Visited);
+}
+
+/// calcCompactRegion - Compute the set of edge bundles that should be live
+/// when splitting the current live range into compact regions. Compact
+/// regions can be computed without looking at interference. They are the
+/// regions formed by removing all the live-through blocks from the live range.
+///
+/// Returns false if the current live range is already compact, or if the
+/// compact regions would form single block regions anyway.
+bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
+ // Without any through blocks, the live range is already compact.
+ if (!SA->getNumThroughBlocks())
+ return false;
+
+ // Compact regions don't correspond to any physreg.
+ Cand.reset(IntfCache, 0);
+
+ DEBUG(dbgs() << "Compact region bundles");
+
+ // Use the spill placer to determine the live bundles. GrowRegion pretends
+ // that all the through blocks have interference when PhysReg is unset.
+ SpillPlacer->prepare(Cand.LiveBundles);
+
+ // The static split cost will be zero since Cand.Intf reports no interference.
+ BlockFrequency Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ growRegion(Cand);
+ SpillPlacer->finish();
+
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << ", none.\n");
+ return false;
+ }
+
+ DEBUG({
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ return true;
+}
+
+/// calcSpillCost - Compute how expensive it would be to split the live range in
+/// SA around all use blocks instead of forming bundle regions.
+BlockFrequency RAGreedy::calcSpillCost() {
+ BlockFrequency Cost = 0;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ // We normally only need one spill instruction - a load or a store.
+ Cost += SpillPlacer->getBlockFrequency(Number);
+
+ // Unless the value is redefined in the block.
+ if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
+ Cost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return Cost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SplitConstraints.
+///
+BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
+ BlockFrequency GlobalCost = 0;
+ const BitVector &LiveBundles = Cand.LiveBundles;
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+ unsigned Ins = 0;
+
+ if (BI.LiveIn)
+ Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
+ if (BI.LiveOut)
+ Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
+ while (Ins--)
+ GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
+ }
+
+ for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
+ unsigned Number = Cand.ActiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
+ if (!RegIn && !RegOut)
+ continue;
+ if (RegIn && RegOut) {
+ // We need double spill code if this block has interference.
+ Cand.Intf.moveToBlock(Number);
+ if (Cand.Intf.hasInterference()) {
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ }
+ continue;
+ }
+ // live-in / stack-out or stack-in live-out.
+ GlobalCost += SpillPlacer->getBlockFrequency(Number);
+ }
+ return GlobalCost;
+}
+
+/// splitAroundRegion - Split the current live range around the regions
+/// determined by BundleCand and GlobalCand.
+///
+/// Before calling this function, GlobalCand and BundleCand must be initialized
+/// so each bundle is assigned to a valid candidate, or NoCand for the
+/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
+/// objects must be initialized for the current live range, and intervals
+/// created for the used candidates.
+///
+/// @param LREdit The LiveRangeEdit object handling the current split.
+/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
+/// must appear in this list.
+void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
+ ArrayRef<unsigned> UsedCands) {
+ // These are the intervals created for new global ranges. We may create more
+ // intervals for local ranges.
+ const unsigned NumGlobalIntvs = LREdit.size();
+ DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n");
+ assert(NumGlobalIntvs && "No global intervals configured");
+
+ // Isolate even single instructions when dealing with a proper sub-class.
+ // That guarantees register class inflation for the stack interval because it
+ // is all copies.
+ unsigned Reg = SA->getParent().reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+
+ // First handle all the blocks with uses.
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ unsigned Number = BI.MBB->getNumber();
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+ if (BI.LiveIn) {
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+ }
+ if (BI.LiveOut) {
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ }
+
+ // Create separate intervals for isolated blocks with multiple uses.
+ if (!IntvIn && !IntvOut) {
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ continue;
+ }
+
+ if (IntvIn && IntvOut)
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ else if (IntvIn)
+ SE->splitRegInBlock(BI, IntvIn, IntfIn);
+ else
+ SE->splitRegOutBlock(BI, IntvOut, IntfOut);
+ }
+
+ // Handle live-through blocks. The relevant live-through blocks are stored in
+ // the ActiveBlocks list with each candidate. We need to filter out
+ // duplicates.
+ BitVector Todo = SA->getThroughBlocks();
+ for (unsigned c = 0; c != UsedCands.size(); ++c) {
+ ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ unsigned Number = Blocks[i];
+ if (!Todo.test(Number))
+ continue;
+ Todo.reset(Number);
+
+ unsigned IntvIn = 0, IntvOut = 0;
+ SlotIndex IntfIn, IntfOut;
+
+ unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
+ if (CandIn != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandIn];
+ IntvIn = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfIn = Cand.Intf.first();
+ }
+
+ unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
+ if (CandOut != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[CandOut];
+ IntvOut = Cand.IntvIdx;
+ Cand.Intf.moveToBlock(Number);
+ IntfOut = Cand.Intf.last();
+ }
+ if (!IntvIn && !IntvOut)
+ continue;
+ SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
+ }
+ }
+
+ ++NumGlobalSplits;
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ unsigned OrigBlocks = SA->getNumLiveBlocks();
+
+ // Sort out the new intervals created by splitting. We get four kinds:
+ // - Remainder intervals should not be split again.
+ // - Candidate intervals can be assigned to Cand.PhysReg.
+ // - Block-local splits are candidates for local splitting.
+ // - DCE leftovers should go back on the queue.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &Reg = LIS->getInterval(LREdit.get(i));
+
+ // Ignore old intervals from DCE.
+ if (getStage(Reg) != RS_New)
+ continue;
+
+ // Remainder interval. Don't try splitting again, spill if it doesn't
+ // allocate.
+ if (IntvMap[i] == 0) {
+ setStage(Reg, RS_Spill);
+ continue;
+ }
+
+ // Global intervals. Allow repeated splitting as long as the number of live
+ // blocks is strictly decreasing.
+ if (IntvMap[i] < NumGlobalIntvs) {
+ if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
+ DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
+ << " blocks as original.\n");
+ // Don't allow repeated splitting as a safe guard against looping.
+ setStage(Reg, RS_Split2);
+ }
+ continue;
+ }
+
+ // Other intervals are treated as new. This includes local intervals created
+ // for blocks with multiple uses, and anything created by DCE.
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around region");
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ unsigned NumCands = 0;
+ BlockFrequency BestCost;
+
+ // Check if we can split this live range around a compact region.
+ bool HasCompact = calcCompactRegion(GlobalCand.front());
+ if (HasCompact) {
+ // Yes, keep GlobalCand[0] as the compact region candidate.
+ NumCands = 1;
+ BestCost = BlockFrequency::getMaxFrequency();
+ } else {
+ // No benefit from the compact region, our fallback will be per-block
+ // splitting. Make sure we find a solution that is cheaper than spilling.
+ BestCost = calcSpillCost();
+ DEBUG(dbgs() << "Cost of isolating all blocks = ";
+ MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
+ }
+
+ unsigned BestCand =
+ calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
+ false/*IgnoreCSR*/);
+
+ // No solutions found, fall back to single block splitting.
+ if (!HasCompact && BestCand == NoCand)
+ return 0;
+
+ return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
+}
+
+unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ BlockFrequency &BestCost,
+ unsigned &NumCands,
+ bool IgnoreCSR) {
+ unsigned BestCand = NoCand;
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
+ continue;
+
+ // Discard bad candidates before we run out of interference cache cursors.
+ // This will only affect register classes with a lot of registers (>32).
+ if (NumCands == IntfCache.getMaxCursors()) {
+ unsigned WorstCount = ~0u;
+ unsigned Worst = 0;
+ for (unsigned i = 0; i != NumCands; ++i) {
+ if (i == BestCand || !GlobalCand[i].PhysReg)
+ continue;
+ unsigned Count = GlobalCand[i].LiveBundles.count();
+ if (Count < WorstCount) {
+ Worst = i;
+ WorstCount = Count;
+ }
+ }
+ --NumCands;
+ GlobalCand[Worst] = GlobalCand[NumCands];
+ if (BestCand == NumCands)
+ BestCand = Worst;
+ }
+
+ if (GlobalCand.size() <= NumCands)
+ GlobalCand.resize(NumCands+1);
+ GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+ Cand.reset(IntfCache, PhysReg);
+
+ SpillPlacer->prepare(Cand.LiveBundles);
+ BlockFrequency Cost;
+ if (!addSplitConstraints(Cand.Intf, Cost)) {
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
+ continue;
+ }
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = ";
+ MBFI->printBlockFreq(dbgs(), Cost));
+ if (Cost >= BestCost) {
+ DEBUG({
+ if (BestCand == NoCand)
+ dbgs() << " worse than no bundles\n";
+ else
+ dbgs() << " worse than "
+ << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
+ });
+ continue;
+ }
+ growRegion(Cand);
+
+ SpillPlacer->finish();
+
+ // No live bundles, defer to splitSingleBlocks().
+ if (!Cand.LiveBundles.any()) {
+ DEBUG(dbgs() << " no bundles.\n");
+ continue;
+ }
+
+ Cost += calcGlobalSplitCost(Cand);
+ DEBUG({
+ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
+ << " with bundles";
+ for (int i = Cand.LiveBundles.find_first(); i>=0;
+ i = Cand.LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+ if (Cost < BestCost) {
+ BestCand = NumCands;
+ BestCost = Cost;
+ }
+ ++NumCands;
+ }
+ return BestCand;
+}
+
+unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+ bool HasCompact,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ SmallVector<unsigned, 8> UsedCands;
+ // Prepare split editor.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ SE->reset(LREdit, SplitSpillMode);
+
+ // Assign all edge bundles to the preferred candidate, or NoCand.
+ BundleCand.assign(Bundles->getNumBundles(), NoCand);
+
+ // Assign bundles for the best candidate region.
+ if (BestCand != NoCand) {
+ GlobalSplitCandidate &Cand = GlobalCand[BestCand];
+ if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
+ UsedCands.push_back(BestCand);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
+ << B << " bundles, intv " << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ // Assign bundles for the compact region.
+ if (HasCompact) {
+ GlobalSplitCandidate &Cand = GlobalCand.front();
+ assert(!Cand.PhysReg && "Compact region has no physreg");
+ if (unsigned B = Cand.getBundles(BundleCand, 0)) {
+ UsedCands.push_back(0);
+ Cand.IntvIdx = SE->openIntv();
+ DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv "
+ << Cand.IntvIdx << ".\n");
+ (void)B;
+ }
+ }
+
+ splitAroundRegion(LREdit, UsedCands);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Per-Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// tryBlockSplit - Split a global live range around every block with uses. This
+/// creates a lot of local live ranges, that will be split by tryLocalSplit if
+/// they don't allocate.
+unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
+ unsigned Reg = VirtReg.reg;
+ bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ SE->reset(LREdit, SplitSpillMode);
+ ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
+ for (unsigned i = 0; i != UseBlocks.size(); ++i) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
+ SE->splitSingleBlock(BI);
+ }
+ // No blocks were split.
+ if (LREdit.empty())
+ return 0;
+
+ // We did split for some blocks.
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+
+ // Tell LiveDebugVariables about the new ranges.
+ DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
+
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Sort out the new intervals created by splitting. The remainder interval
+ // goes straight to spilling, the new local ranges get to stay RS_New.
+ for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
+ LiveInterval &LI = LIS->getInterval(LREdit.get(i));
+ if (getStage(LI) == RS_New && IntvMap[i] == 0)
+ setStage(LI, RS_Spill);
+ }
+
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Per-Instruction Splitting
+//===----------------------------------------------------------------------===//
+
+/// Get the number of allocatable registers that match the constraints of \p Reg
+/// on \p MI and that are also in \p SuperRC.
+static unsigned getNumAllocatableRegsForConstraints(
+ const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC,
+ const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
+ const RegisterClassInfo &RCI) {
+ assert(SuperRC && "Invalid register class");
+
+ const TargetRegisterClass *ConstrainedRC =
+ MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
+ /* ExploreBundle */ true);
+ if (!ConstrainedRC)
+ return 0;
+ return RCI.getNumAllocatableRegs(ConstrainedRC);
+}
+
+/// tryInstructionSplit - Split a live range around individual instructions.
+/// This is normally not worthwhile since the spiller is doing essentially the
+/// same thing. However, when the live range is in a constrained register
+/// class, it may help to insert copies such that parts of the live range can
+/// be moved to a larger register class.
+///
+/// This is similar to spilling to a larger register class.
+unsigned
+RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
+ // There is no point to this if there are no larger sub-classes.
+ if (!RegClassInfo.isProperSubClass(CurRC))
+ return 0;
+
+ // Always enable split spill mode, since we're effectively spilling to a
+ // register.
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ SE->reset(LREdit, SplitEditor::SM_Size);
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 1)
+ return 0;
+
+ DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
+
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(CurRC, *MF);
+ unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
+ // Split around every non-copy instruction if this split will relax
+ // the constraints on the virtual register.
+ // Otherwise, splitting just inserts uncoalescable copies that do not help
+ // the allocation.
+ for (unsigned i = 0; i != Uses.size(); ++i) {
+ if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
+ if (MI->isFullCopy() ||
+ SuperRCNumAllocatableRegs ==
+ getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,
+ TRI, RCI)) {
+ DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
+ continue;
+ }
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]);
+ SE->useIntv(SegStart, SegStop);
+ }
+
+ if (LREdit.empty()) {
+ DEBUG(dbgs() << "All uses were copies.\n");
+ return 0;
+ }
+
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+
+ // Assign all new registers to RS_Spill. This was the last chance.
+ setStage(LREdit.begin(), LREdit.end(), RS_Spill);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+ SmallVectorImpl<float> &GapWeight) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ const unsigned NumGaps = Uses.size()-1;
+
+ // Start and end points for the interference check.
+ SlotIndex StartIdx =
+ BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
+ SlotIndex StopIdx =
+ BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
+
+ GapWeight.assign(NumGaps, 0.0f);
+
+ // Add interference from each overlapping register.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
+ .checkInterference())
+ continue;
+
+ // We know that VirtReg is a continuous interval from FirstInstr to
+ // LastInstr, so we don't need InterferenceQuery.
+ //
+ // Interference that overlaps an instruction is counted in both gaps
+ // surrounding the instruction. The exception is interference before
+ // StartIdx and after StopIdx.
+ //
+ LiveIntervalUnion::SegmentIter IntI =
+ Matrix->getLiveUnions()[*Units] .find(StartIdx);
+ for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+ // Skip the gaps before IntI.
+ while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ // Update the gaps covered by IntI.
+ const float weight = IntI.value()->weight;
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+ if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+
+ // Add fixed interference.
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ const LiveRange &LR = LIS->getRegUnit(*Units);
+ LiveRange::const_iterator I = LR.find(StartIdx);
+ LiveRange::const_iterator E = LR.end();
+
+ // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
+ for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
+ while (Uses[Gap+1].getBoundaryIndex() < I->start)
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = llvm::huge_valf;
+ if (Uses[Gap+1].getBaseIndex() >= I->end)
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
+
+ // Note that it is possible to have an interval that is live-in or live-out
+ // while only covering a single block - A phi-def can use undef values from
+ // predecessors, and the block could be a single-block loop.
+ // We don't bother doing anything clever about such a case, we simply assume
+ // that the interval is continuous from FirstInstr to LastInstr. We should
+ // make sure that we don't do anything illegal to such an interval, though.
+
+ ArrayRef<SlotIndex> Uses = SA->getUseSlots();
+ if (Uses.size() <= 2)
+ return 0;
+ const unsigned NumGaps = Uses.size()-1;
+
+ DEBUG({
+ dbgs() << "tryLocalSplit: ";
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ dbgs() << ' ' << Uses[i];
+ dbgs() << '\n';
+ });
+
+ // If VirtReg is live across any register mask operands, compute a list of
+ // gaps with register masks.
+ SmallVector<unsigned, 8> RegMaskGaps;
+ if (Matrix->checkRegMaskInterference(VirtReg)) {
+ // Get regmask slots for the whole block.
+ ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
+ DEBUG(dbgs() << RMS.size() << " regmasks in block:");
+ // Constrain to VirtReg's live range.
+ unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
+ Uses.front().getRegSlot()) - RMS.begin();
+ unsigned re = RMS.size();
+ for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
+ // Look for Uses[i] <= RMS <= Uses[i+1].
+ assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
+ if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
+ continue;
+ // Skip a regmask on the same instruction as the last use. It doesn't
+ // overlap the live range.
+ if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
+ break;
+ DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
+ RegMaskGaps.push_back(i);
+ // Advance ri to the next gap. A regmask on one of the uses counts in
+ // both gaps.
+ while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
+ ++ri;
+ }
+ DEBUG(dbgs() << '\n');
+ }
+
+ // Since we allow local split results to be split again, there is a risk of
+ // creating infinite loops. It is tempting to require that the new live
+ // ranges have less instructions than the original. That would guarantee
+ // convergence, but it is too strict. A live range with 3 instructions can be
+ // split 2+3 (including the COPY), and we want to allow that.
+ //
+ // Instead we use these rules:
+ //
+ // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
+ // noop split, of course).
+ // 2. Require progress be made for ranges with getStage() == RS_Split2. All
+ // the new ranges must have fewer instructions than before the split.
+ // 3. New ranges with the same number of instructions are marked RS_Split2,
+ // smaller ranges are marked RS_New.
+ //
+ // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
+ // excessive splitting and infinite loops.
+ //
+ bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
+
+ // Best split candidate.
+ unsigned BestBefore = NumGaps;
+ unsigned BestAfter = 0;
+ float BestDiff = 0;
+
+ const float blockFreq =
+ SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
+ (1.0f / MBFI->getEntryFreq());
+ SmallVector<float, 8> GapWeight;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Keep track of the largest spill weight that would need to be evicted in
+ // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+ calcGapWeights(PhysReg, GapWeight);
+
+ // Remove any gaps with regmask clobbers.
+ if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
+ for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
+ GapWeight[RegMaskGaps[i]] = llvm::huge_valf;
+
+ // Try to find the best sequence of gaps to close.
+ // The new spill weight must be larger than any gap interference.
+
+ // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+ unsigned SplitBefore = 0, SplitAfter = 1;
+
+ // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+ // It is the spill weight that needs to be evicted.
+ float MaxGap = GapWeight[0];
+
+ for (;;) {
+ // Live before/after split?
+ const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+ const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+ << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+ << " i=" << MaxGap);
+
+ // Stop before the interval gets so big we wouldn't be making progress.
+ if (!LiveBefore && !LiveAfter) {
+ DEBUG(dbgs() << " all\n");
+ break;
+ }
+ // Should the interval be extended or shrunk?
+ bool Shrink = true;
+
+ // How many gaps would the new range have?
+ unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
+
+ // Legally, without causing looping?
+ bool Legal = !ProgressRequired || NewGaps < NumGaps;
+
+ if (Legal && MaxGap < llvm::huge_valf) {
+ // Estimate the new spill weight. Each instruction reads or writes the
+ // register. Conservatively assume there are no read-modify-write
+ // instructions.
+ //
+ // Try to guess the size of the new interval.
+ const float EstWeight = normalizeSpillWeight(
+ blockFreq * (NewGaps + 1),
+ Uses[SplitBefore].distance(Uses[SplitAfter]) +
+ (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
+ 1);
+ // Would this split be possible to allocate?
+ // Never allocate all gaps, we wouldn't be making progress.
+ DEBUG(dbgs() << " w=" << EstWeight);
+ if (EstWeight * Hysteresis >= MaxGap) {
+ Shrink = false;
+ float Diff = EstWeight - MaxGap;
+ if (Diff > BestDiff) {
+ DEBUG(dbgs() << " (best)");
+ BestDiff = Hysteresis * Diff;
+ BestBefore = SplitBefore;
+ BestAfter = SplitAfter;
+ }
+ }
+ }
+
+ // Try to shrink.
+ if (Shrink) {
+ if (++SplitBefore < SplitAfter) {
+ DEBUG(dbgs() << " shrink\n");
+ // Recompute the max when necessary.
+ if (GapWeight[SplitBefore - 1] >= MaxGap) {
+ MaxGap = GapWeight[SplitBefore];
+ for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+ MaxGap = std::max(MaxGap, GapWeight[i]);
+ }
+ continue;
+ }
+ MaxGap = 0;
+ }
+
+ // Try to extend the interval.
+ if (SplitAfter >= NumGaps) {
+ DEBUG(dbgs() << " end\n");
+ break;
+ }
+
+ DEBUG(dbgs() << " extend\n");
+ MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
+ }
+ }
+
+ // Didn't find any candidates?
+ if (BestBefore == NumGaps)
+ return 0;
+
+ DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+ << '-' << Uses[BestAfter] << ", " << BestDiff
+ << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+ LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ SE->reset(LREdit);
+
+ SE->openIntv();
+ SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
+ SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
+ SE->useIntv(SegStart, SegStop);
+ SmallVector<unsigned, 8> IntvMap;
+ SE->finish(&IntvMap);
+ DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
+
+ // If the new range has the same number of instructions as before, mark it as
+ // RS_Split2 so the next split will be forced to make progress. Otherwise,
+ // leave the new intervals as RS_New so they can compete.
+ bool LiveBefore = BestBefore != 0 || BI.LiveIn;
+ bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
+ unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
+ if (NewGaps >= NumGaps) {
+ DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ assert(!ProgressRequired && "Didn't make progress when it was required.");
+ for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
+ if (IntvMap[i] == 1) {
+ setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
+ DEBUG(dbgs() << PrintReg(LREdit.get(i)));
+ }
+ DEBUG(dbgs() << '\n');
+ }
+ ++NumLocalSplits;
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<unsigned>&NewVRegs) {
+ // Ranges must be Split2 or less.
+ if (getStage(VirtReg) >= RS_Spill)
+ return 0;
+
+ // Local intervals are handled separately.
+ if (LIS->intervalIsInOneMBB(VirtReg)) {
+ NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+ SA->analyze(&VirtReg);
+ unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ return tryInstructionSplit(VirtReg, Order, NewVRegs);
+ }
+
+ NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+ SA->analyze(&VirtReg);
+
+ // FIXME: SplitAnalysis may repair broken live ranges coming from the
+ // coalescer. That may cause the range to become allocatable which means that
+ // tryRegionSplit won't be making progress. This check should be replaced with
+ // an assertion when the coalescer is fixed.
+ if (SA->didRepairRange()) {
+ // VirtReg has changed, so all cached queries are invalid.
+ Matrix->invalidateVirtRegs();
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ return PhysReg;
+ }
+
+ // First try to split around a region spanning multiple blocks. RS_Split2
+ // ranges already made dubious progress with region splitting, so they go
+ // straight to single block splitting.
+ if (getStage(VirtReg) < RS_Split2) {
+ unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+ }
+
+ // Then isolate blocks.
+ return tryBlockSplit(VirtReg, Order, NewVRegs);
+}
+
+//===----------------------------------------------------------------------===//
+// Last Chance Recoloring
+//===----------------------------------------------------------------------===//
+
+/// mayRecolorAllInterferences - Check if the virtual registers that
+/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
+/// recolored to free \p PhysReg.
+/// When true is returned, \p RecoloringCandidates has been augmented with all
+/// the live intervals that need to be recolored in order to free \p PhysReg
+/// for \p VirtReg.
+/// \p FixedRegisters contains all the virtual registers that cannot be
+/// recolored.
+bool
+RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
+ SmallLISet &RecoloringCandidates,
+ const SmallVirtRegSet &FixedRegisters) {
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
+
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ // If there is LastChanceRecoloringMaxInterference or more interferences,
+ // chances are one would not be recolorable.
+ if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
+ LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
+ DEBUG(dbgs() << "Early abort: too many interferences.\n");
+ CutOffInfo |= CO_Interf;
+ return false;
+ }
+ for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+ LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ // If Intf is done and sit on the same register class as VirtReg,
+ // it would not be recolorable as it is in the same state as VirtReg.
+ if ((getStage(*Intf) == RS_Done &&
+ MRI->getRegClass(Intf->reg) == CurRC) ||
+ FixedRegisters.count(Intf->reg)) {
+ DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n");
+ return false;
+ }
+ RecoloringCandidates.insert(Intf);
+ }
+ }
+ return true;
+}
+
+/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
+/// its interferences.
+/// Last chance recoloring chooses a color for \p VirtReg and recolors every
+/// virtual register that was using it. The recoloring process may recursively
+/// use the last chance recoloring. Therefore, when a virtual register has been
+/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
+/// be last-chance-recolored again during this recoloring "session".
+/// E.g.,
+/// Let
+/// vA can use {R1, R2 }
+/// vB can use { R2, R3}
+/// vC can use {R1 }
+/// Where vA, vB, and vC cannot be split anymore (they are reloads for
+/// instance) and they all interfere.
+///
+/// vA is assigned R1
+/// vB is assigned R2
+/// vC tries to evict vA but vA is already done.
+/// Regular register allocation fails.
+///
+/// Last chance recoloring kicks in:
+/// vC does as if vA was evicted => vC uses R1.
+/// vC is marked as fixed.
+/// vA needs to find a color.
+/// None are available.
+/// vA cannot evict vC: vC is a fixed virtual register now.
+/// vA does as if vB was evicted => vA uses R2.
+/// vB needs to find a color.
+/// R3 is available.
+/// Recoloring => vC = R1, vA = R2, vB = R3
+///
+/// \p Order defines the preferred allocation order for \p VirtReg.
+/// \p NewRegs will contain any new virtual register that have been created
+/// (split, spill) during the process and that must be assigned.
+/// \p FixedRegisters contains all the virtual registers that cannot be
+/// recolored.
+/// \p Depth gives the current depth of the last chance recoloring.
+/// \return a physical register that can be used for VirtReg or ~0u if none
+/// exists.
+unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ unsigned Depth) {
+ DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
+ // Ranges must be Done.
+ assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
+ "Last chance recoloring should really be last chance");
+ // Set the max depth to LastChanceRecoloringMaxDepth.
+ // We may want to reconsider that if we end up with a too large search space
+ // for target with hundreds of registers.
+ // Indeed, in that case we may want to cut the search space earlier.
+ if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
+ DEBUG(dbgs() << "Abort because max depth has been reached.\n");
+ CutOffInfo |= CO_Depth;
+ return ~0u;
+ }
+
+ // Set of Live intervals that will need to be recolored.
+ SmallLISet RecoloringCandidates;
+ // Record the original mapping virtual register to physical register in case
+ // the recoloring fails.
+ DenseMap<unsigned, unsigned> VirtRegToPhysReg;
+ // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
+ // this recoloring "session".
+ FixedRegisters.insert(VirtReg.reg);
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
+ << PrintReg(PhysReg, TRI) << '\n');
+ RecoloringCandidates.clear();
+ VirtRegToPhysReg.clear();
+
+ // It is only possible to recolor virtual register interference.
+ if (Matrix->checkInterference(VirtReg, PhysReg) >
+ LiveRegMatrix::IK_VirtReg) {
+ DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n");
+
+ continue;
+ }
+
+ // Early give up on this PhysReg if it is obvious we cannot recolor all
+ // the interferences.
+ if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
+ FixedRegisters)) {
+ DEBUG(dbgs() << "Some inteferences cannot be recolored.\n");
+ continue;
+ }
+
+ // RecoloringCandidates contains all the virtual registers that interfer
+ // with VirtReg on PhysReg (or one of its aliases).
+ // Enqueue them for recoloring and perform the actual recoloring.
+ PQueue RecoloringQueue;
+ for (SmallLISet::iterator It = RecoloringCandidates.begin(),
+ EndIt = RecoloringCandidates.end();
+ It != EndIt; ++It) {
+ unsigned ItVirtReg = (*It)->reg;
+ enqueue(RecoloringQueue, *It);
+ assert(VRM->hasPhys(ItVirtReg) &&
+ "Interferences are supposed to be with allocated vairables");
+
+ // Record the current allocation.
+ VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
+ // unset the related struct.
+ Matrix->unassign(**It);
+ }
+
+ // Do as if VirtReg was assigned to PhysReg so that the underlying
+ // recoloring has the right information about the interferes and
+ // available colors.
+ Matrix->assign(VirtReg, PhysReg);
+
+ // Save the current recoloring state.
+ // If we cannot recolor all the interferences, we will have to start again
+ // at this point for the next physical register.
+ SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
+ if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters,
+ Depth)) {
+ // Do not mess up with the global assignment process.
+ // I.e., VirtReg must be unassigned.
+ Matrix->unassign(VirtReg);
+ return PhysReg;
+ }
+
+ DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
+ << PrintReg(PhysReg, TRI) << '\n');
+
+ // The recoloring attempt failed, undo the changes.
+ FixedRegisters = SaveFixedRegisters;
+ Matrix->unassign(VirtReg);
+
+ for (SmallLISet::iterator It = RecoloringCandidates.begin(),
+ EndIt = RecoloringCandidates.end();
+ It != EndIt; ++It) {
+ unsigned ItVirtReg = (*It)->reg;
+ if (VRM->hasPhys(ItVirtReg))
+ Matrix->unassign(**It);
+ unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg];
+ Matrix->assign(**It, ItPhysReg);
+ }
+ }
+
+ // Last chance recoloring did not worked either, give up.
+ return ~0u;
+}
+
+/// tryRecoloringCandidates - Try to assign a new color to every register
+/// in \RecoloringQueue.
+/// \p NewRegs will contain any new virtual register created during the
+/// recoloring process.
+/// \p FixedRegisters[in/out] contains all the registers that have been
+/// recolored.
+/// \return true if all virtual registers in RecoloringQueue were successfully
+/// recolored, false otherwise.
+bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
+ SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ unsigned Depth) {
+ while (!RecoloringQueue.empty()) {
+ LiveInterval *LI = dequeue(RecoloringQueue);
+ DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
+ unsigned PhysReg;
+ PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
+ if (PhysReg == ~0u || !PhysReg)
+ return false;
+ DEBUG(dbgs() << "Recoloring of " << *LI
+ << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
+ Matrix->assign(*LI, PhysReg);
+ FixedRegisters.insert(LI->reg);
+ }
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ CutOffInfo = CO_None;
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ SmallVirtRegSet FixedRegisters;
+ unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+ if (Reg == ~0U && (CutOffInfo != CO_None)) {
+ uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
+ if (CutOffEncountered == CO_Depth)
+ Ctx.emitError("register allocation failed: maximum depth for recoloring "
+ "reached. Use -fexhaustive-register-search to skip "
+ "cutoffs");
+ else if (CutOffEncountered == CO_Interf)
+ Ctx.emitError("register allocation failed: maximum interference for "
+ "recoloring reached. Use -fexhaustive-register-search "
+ "to skip cutoffs");
+ else if (CutOffEncountered == (CO_Depth | CO_Interf))
+ Ctx.emitError("register allocation failed: maximum interference and "
+ "depth for recoloring reached. Use "
+ "-fexhaustive-register-search to skip cutoffs");
+ }
+ return Reg;
+}
+
+/// Using a CSR for the first time has a cost because it causes push|pop
+/// to be added to prologue|epilogue. Splitting a cold section of the live
+/// range can have lower cost than using the CSR for the first time;
+/// Spilling a live range in the cold path can have lower cost than using
+/// the CSR for the first time. Returns the physical register if we decide
+/// to use the CSR; otherwise return 0.
+unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ unsigned PhysReg,
+ unsigned &CostPerUseLimit,
+ SmallVectorImpl<unsigned> &NewVRegs) {
+ if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
+ // We choose spill over using the CSR for the first time if the spill cost
+ // is lower than CSRCost.
+ SA->analyze(&VirtReg);
+ if (calcSpillCost() >= CSRCost)
+ return PhysReg;
+
+ // We are going to spill, set CostPerUseLimit to 1 to make sure that
+ // we will not use a callee-saved register in tryEvict.
+ CostPerUseLimit = 1;
+ return 0;
+ }
+ if (getStage(VirtReg) < RS_Split) {
+ // We choose pre-splitting over using the CSR for the first time if
+ // the cost of splitting is lower than CSRCost.
+ SA->analyze(&VirtReg);
+ unsigned NumCands = 0;
+ BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
+ unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
+ NumCands, true /*IgnoreCSR*/);
+ if (BestCand == NoCand)
+ // Use the CSR if we can't find a region split below CSRCost.
+ return PhysReg;
+
+ // Perform the actual pre-splitting.
+ doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
+ return 0;
+ }
+ return PhysReg;
+}
+
+void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
+ // Do not keep invalid information around.
+ SetOfBrokenHints.remove(&LI);
+}
+
+void RAGreedy::initializeCSRCost() {
+ // We use the larger one out of the command-line option and the value report
+ // by TRI.
+ CSRCost = BlockFrequency(
+ std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
+ if (!CSRCost.getFrequency())
+ return;
+
+ // Raw cost is relative to Entry == 2^14; scale it appropriately.
+ uint64_t ActualEntry = MBFI->getEntryFreq();
+ if (!ActualEntry) {
+ CSRCost = 0;
+ return;
+ }
+ uint64_t FixedEntry = 1 << 14;
+ if (ActualEntry < FixedEntry)
+ CSRCost *= BranchProbability(ActualEntry, FixedEntry);
+ else if (ActualEntry <= UINT32_MAX)
+ // Invert the fraction and divide.
+ CSRCost /= BranchProbability(FixedEntry, ActualEntry);
+ else
+ // Can't use BranchProbability in general, since it takes 32-bit numbers.
+ CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
+}
+
+/// \brief Collect the hint info for \p Reg.
+/// The results are stored into \p Out.
+/// \p Out is not cleared before being populated.
+void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
+ for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
+ if (!Instr.isFullCopy())
+ continue;
+ // Look for the other end of the copy.
+ unsigned OtherReg = Instr.getOperand(0).getReg();
+ if (OtherReg == Reg) {
+ OtherReg = Instr.getOperand(1).getReg();
+ if (OtherReg == Reg)
+ continue;
+ }
+ // Get the current assignment.
+ unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
+ ? OtherReg
+ : VRM->getPhys(OtherReg);
+ // Push the collected information.
+ Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
+ OtherPhysReg));
+ }
+}
+
+/// \brief Using the given \p List, compute the cost of the broken hints if
+/// \p PhysReg was used.
+/// \return The cost of \p List for \p PhysReg.
+BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
+ unsigned PhysReg) {
+ BlockFrequency Cost = 0;
+ for (const HintInfo &Info : List) {
+ if (Info.PhysReg != PhysReg)
+ Cost += Info.Freq;
+ }
+ return Cost;
+}
+
+/// \brief Using the register assigned to \p VirtReg, try to recolor
+/// all the live ranges that are copy-related with \p VirtReg.
+/// The recoloring is then propagated to all the live-ranges that have
+/// been recolored and so on, until no more copies can be coalesced or
+/// it is not profitable.
+/// For a given live range, profitability is determined by the sum of the
+/// frequencies of the non-identity copies it would introduce with the old
+/// and new register.
+void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
+ // We have a broken hint, check if it is possible to fix it by
+ // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
+ // some register and PhysReg may be available for the other live-ranges.
+ SmallSet<unsigned, 4> Visited;
+ SmallVector<unsigned, 2> RecoloringCandidates;
+ HintsInfo Info;
+ unsigned Reg = VirtReg.reg;
+ unsigned PhysReg = VRM->getPhys(Reg);
+ // Start the recoloring algorithm from the input live-interval, then
+ // it will propagate to the ones that are copy-related with it.
+ Visited.insert(Reg);
+ RecoloringCandidates.push_back(Reg);
+
+ DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '('
+ << PrintReg(PhysReg, TRI) << ")\n");
+
+ do {
+ Reg = RecoloringCandidates.pop_back_val();
+
+ // We cannot recolor physcal register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
+
+ // Get the live interval mapped with this virtual register to be able
+ // to check for the interference with the new color.
+ LiveInterval &LI = LIS->getInterval(Reg);
+ unsigned CurrPhys = VRM->getPhys(Reg);
+ // Check that the new color matches the register class constraints and
+ // that it is free for this live range.
+ if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
+ Matrix->checkInterference(LI, PhysReg)))
+ continue;
+
+ DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI)
+ << ") is recolorable.\n");
+
+ // Gather the hint info.
+ Info.clear();
+ collectHintInfo(Reg, Info);
+ // Check if recoloring the live-range will increase the cost of the
+ // non-identity copies.
+ if (CurrPhys != PhysReg) {
+ DEBUG(dbgs() << "Checking profitability:\n");
+ BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
+ BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
+ DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
+ << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n');
+ if (OldCopiesCost < NewCopiesCost) {
+ DEBUG(dbgs() << "=> Not profitable.\n");
+ continue;
+ }
+ // At this point, the cost is either cheaper or equal. If it is
+ // equal, we consider this is profitable because it may expose
+ // more recoloring opportunities.
+ DEBUG(dbgs() << "=> Profitable.\n");
+ // Recolor the live-range.
+ Matrix->unassign(LI);
+ Matrix->assign(LI, PhysReg);
+ }
+ // Push all copy-related live-ranges to keep reconciling the broken
+ // hints.
+ for (const HintInfo &HI : Info) {
+ if (Visited.insert(HI.Reg).second)
+ RecoloringCandidates.push_back(HI.Reg);
+ }
+ } while (!RecoloringCandidates.empty());
+}
+
+/// \brief Try to recolor broken hints.
+/// Broken hints may be repaired by recoloring when an evicted variable
+/// freed up a register for a larger live-range.
+/// Consider the following example:
+/// BB1:
+/// a =
+/// b =
+/// BB2:
+/// ...
+/// = b
+/// = a
+/// Let us assume b gets split:
+/// BB1:
+/// a =
+/// b =
+/// BB2:
+/// c = b
+/// ...
+/// d = c
+/// = d
+/// = a
+/// Because of how the allocation work, b, c, and d may be assigned different
+/// colors. Now, if a gets evicted later:
+/// BB1:
+/// a =
+/// st a, SpillSlot
+/// b =
+/// BB2:
+/// c = b
+/// ...
+/// d = c
+/// = d
+/// e = ld SpillSlot
+/// = e
+/// This is likely that we can assign the same register for b, c, and d,
+/// getting rid of 2 copies.
+void RAGreedy::tryHintsRecoloring() {
+ for (LiveInterval *LI : SetOfBrokenHints) {
+ assert(TargetRegisterInfo::isVirtualRegister(LI->reg) &&
+ "Recoloring is possible only for virtual registers");
+ // Some dead defs may be around (e.g., because of debug uses).
+ // Ignore those.
+ if (!VRM->hasPhys(LI->reg))
+ continue;
+ tryHintRecoloring(*LI);
+ }
+}
+
+unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
+ SmallVectorImpl<unsigned> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ unsigned Depth) {
+ unsigned CostPerUseLimit = ~0u;
+ // First try assigning a free register.
+ AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
+ // When NewVRegs is not empty, we may have made decisions such as evicting
+ // a virtual register, go with the earlier decisions and use the physical
+ // register.
+ if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
+ NewVRegs.empty()) {
+ unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
+ CostPerUseLimit, NewVRegs);
+ if (CSRReg || !NewVRegs.empty())
+ // Return now if we decide to use a CSR or create new vregs due to
+ // pre-splitting.
+ return CSRReg;
+ } else
+ return PhysReg;
+ }
+
+ LiveRangeStage Stage = getStage(VirtReg);
+ DEBUG(dbgs() << StageName[Stage]
+ << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
+
+ // Try to evict a less worthy live range, but only for ranges from the primary
+ // queue. The RS_Split ranges already failed to do this, and they should not
+ // get a second chance until they have been split.
+ if (Stage != RS_Split)
+ if (unsigned PhysReg =
+ tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) {
+ unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
+ // If VirtReg has a hint and that hint is broken record this
+ // virtual register as a recoloring candidate for broken hint.
+ // Indeed, since we evicted a variable in its neighborhood it is
+ // likely we can at least partially recolor some of the
+ // copy-related live-ranges.
+ if (Hint && Hint != PhysReg)
+ SetOfBrokenHints.insert(&VirtReg);
+ return PhysReg;
+ }
+
+ assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+ // The first time we see a live range, don't try to split or spill.
+ // Wait until the second time, when all smaller ranges have been allocated.
+ // This gives a better picture of the interference to split around.
+ if (Stage < RS_Split) {
+ setStage(VirtReg, RS_Split);
+ DEBUG(dbgs() << "wait for second round\n");
+ NewVRegs.push_back(VirtReg.reg);
+ return 0;
+ }
+
+ // If we couldn't allocate a register from spilling, there is probably some
+ // invalid inline assembly. The base class wil report it.
+ if (Stage >= RS_Done || !VirtReg.isSpillable())
+ return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
+ Depth);
+
+ // Try splitting VirtReg or interferences.
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Finally spill VirtReg itself.
+ if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
+ // TODO: This is experimental and in particular, we do not model
+ // the live range splitting done by spilling correctly.
+ // We would need a deep integration with the spiller to do the
+ // right thing here. Anyway, that is still good for early testing.
+ setStage(VirtReg, RS_Memory);
+ DEBUG(dbgs() << "Do as if this register is in memory\n");
+ NewVRegs.push_back(VirtReg.reg);
+ } else {
+ NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
+ spiller().spill(LRE);
+ setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+
+ if (VerifyEnabled)
+ MF->verify(this, "After spilling");
+ }
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+ << "********** Function: " << mf.getName() << '\n');
+
+ MF = &mf;
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ RCI.runOnMachineFunction(mf);
+
+ EnableLocalReassign = EnableLocalReassignment ||
+ MF->getSubtarget().enableRALocalReassignment(
+ MF->getTarget().getOptLevel());
+
+ if (VerifyEnabled)
+ MF->verify(this, "Before greedy register allocator");
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(),
+ getAnalysis<LiveIntervals>(),
+ getAnalysis<LiveRegMatrix>());
+ Indexes = &getAnalysis<SlotIndexes>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+ Loops = &getAnalysis<MachineLoopInfo>();
+ Bundles = &getAnalysis<EdgeBundles>();
+ SpillPlacer = &getAnalysis<SpillPlacement>();
+ DebugVars = &getAnalysis<LiveDebugVariables>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ initializeCSRCost();
+
+ calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI);
+
+ DEBUG(LIS->dump());
+
+ SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+ SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI));
+ ExtraRegInfo.clear();
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ NextCascade = 1;
+ IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
+ GlobalCand.resize(32); // This will grow as needed.
+ SetOfBrokenHints.clear();
+
+ allocatePhysRegs();
+ tryHintsRecoloring();
+ postOptimization();
+
+ releaseMemory();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 000000000000..d1221ec59bd4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,893 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+// architectures. In Proceedings of the Joint Conference on Languages,
+// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+// NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "RegisterCoalescer.h"
+#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Printable.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <limits>
+#include <memory>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+static RegisterRegAlloc
+RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
+ createDefaultPBQPRegisterAllocator);
+
+static cl::opt<bool>
+PBQPCoalescing("pbqp-coalescing",
+ cl::desc("Attempt coalescing during PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<bool>
+PBQPDumpGraphs("pbqp-dump-graphs",
+ cl::desc("Dump graphs for each function/round in the compilation unit."),
+ cl::init(false), cl::Hidden);
+#endif
+
+namespace {
+
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+ static char ID;
+
+ /// Construct a PBQP register allocator.
+ RegAllocPBQP(char *cPassID = nullptr)
+ : MachineFunctionPass(ID), customPassID(cPassID) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// Return the pass name.
+ const char* getPassName() const override {
+ return "PBQP Register Allocator";
+ }
+
+ /// PBQP analysis usage.
+ void getAnalysisUsage(AnalysisUsage &au) const override;
+
+ /// Perform register allocation
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+ typedef std::set<unsigned> RegSet;
+
+ char *customPassID;
+
+ RegSet VRegsToAlloc, EmptyIntervalVRegs;
+
+ /// Inst which is a def of an original reg and whose defs are already all
+ /// dead after remat is saved in DeadRemats. The deletion of such inst is
+ /// postponed till all the allocations are done, so its remat expr is
+ /// always available for the remat of all the siblings of the original reg.
+ SmallPtrSet<MachineInstr *, 32> DeadRemats;
+
+ /// \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
+
+ /// \brief Constructs an initial graph.
+ void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
+
+ /// \brief Spill the given VReg.
+ void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals,
+ MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
+ Spiller &VRegSpiller);
+
+ /// \brief Given a solved PBQP problem maps this solution back to a register
+ /// assignment.
+ bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
+ const PBQP::Solution &Solution,
+ VirtRegMap &VRM,
+ Spiller &VRegSpiller);
+
+ /// \brief Postprocessing before final spilling. Sets basic block "live in"
+ /// variables.
+ void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
+ VirtRegMap &VRM) const;
+
+ void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
+};
+
+char RegAllocPBQP::ID = 0;
+
+/// @brief Set spill costs for each node in the PBQP reg-alloc graph.
+class SpillCosts : public PBQPRAConstraint {
+public:
+ void apply(PBQPRAGraph &G) override {
+ LiveIntervals &LIS = G.getMetadata().LIS;
+
+ // A minimum spill costs, so that register constraints can can be set
+ // without normalization in the [0.0:MinSpillCost( interval.
+ const PBQP::PBQPNum MinSpillCost = 10.0;
+
+ for (auto NId : G.nodeIds()) {
+ PBQP::PBQPNum SpillCost =
+ LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight;
+ if (SpillCost == 0.0)
+ SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
+ else
+ SpillCost += MinSpillCost;
+ PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
+ NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
+ G.setNodeCosts(NId, std::move(NodeCosts));
+ }
+ }
+};
+
+/// @brief Add interference edges between overlapping vregs.
+class Interference : public PBQPRAConstraint {
+private:
+
+ typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
+ typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
+ typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
+ typedef DenseSet<IKey> DisjointAllowedRegsCache;
+ typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey;
+ typedef DenseSet<IEdgeKey> IEdgeCache;
+
+ bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId,
+ const DisjointAllowedRegsCache &D) const {
+ const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+ const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+ if (NRegs == MRegs)
+ return false;
+
+ if (NRegs < MRegs)
+ return D.count(IKey(NRegs, MRegs)) > 0;
+
+ return D.count(IKey(MRegs, NRegs)) > 0;
+ }
+
+ void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId,
+ DisjointAllowedRegsCache &D) {
+ const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+ const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+ assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
+
+ if (NRegs < MRegs)
+ D.insert(IKey(NRegs, MRegs));
+ else
+ D.insert(IKey(MRegs, NRegs));
+ }
+
+ // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
+ // for the fast interference graph construction algorithm. The last is there
+ // to save us from looking up node ids via the VRegToNode map in the graph
+ // metadata.
+ typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>
+ IntervalInfo;
+
+ static SlotIndex getStartPoint(const IntervalInfo &I) {
+ return std::get<0>(I)->segments[std::get<1>(I)].start;
+ }
+
+ static SlotIndex getEndPoint(const IntervalInfo &I) {
+ return std::get<0>(I)->segments[std::get<1>(I)].end;
+ }
+
+ static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
+ return std::get<2>(I);
+ }
+
+ static bool lowestStartPoint(const IntervalInfo &I1,
+ const IntervalInfo &I2) {
+ // Condition reversed because priority queue has the *highest* element at
+ // the front, rather than the lowest.
+ return getStartPoint(I1) > getStartPoint(I2);
+ }
+
+ static bool lowestEndPoint(const IntervalInfo &I1,
+ const IntervalInfo &I2) {
+ SlotIndex E1 = getEndPoint(I1);
+ SlotIndex E2 = getEndPoint(I2);
+
+ if (E1 < E2)
+ return true;
+
+ if (E1 > E2)
+ return false;
+
+ // If two intervals end at the same point, we need a way to break the tie or
+ // the set will assume they're actually equal and refuse to insert a
+ // "duplicate". Just compare the vregs - fast and guaranteed unique.
+ return std::get<0>(I1)->reg < std::get<0>(I2)->reg;
+ }
+
+ static bool isAtLastSegment(const IntervalInfo &I) {
+ return std::get<1>(I) == std::get<0>(I)->size() - 1;
+ }
+
+ static IntervalInfo nextSegment(const IntervalInfo &I) {
+ return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
+ }
+
+public:
+
+ void apply(PBQPRAGraph &G) override {
+ // The following is loosely based on the linear scan algorithm introduced in
+ // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
+ // isn't linear, because the size of the active set isn't bound by the
+ // number of registers, but rather the size of the largest clique in the
+ // graph. Still, we expect this to be better than N^2.
+ LiveIntervals &LIS = G.getMetadata().LIS;
+
+ // Interferenc matrices are incredibly regular - they're only a function of
+ // the allowed sets, so we cache them to avoid the overhead of constructing
+ // and uniquing them.
+ IMatrixCache C;
+
+ // Finding an edge is expensive in the worst case (O(max_clique(G))). So
+ // cache locally edges we have already seen.
+ IEdgeCache EC;
+
+ // Cache known disjoint allowed registers pairs
+ DisjointAllowedRegsCache D;
+
+ typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
+ typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
+ decltype(&lowestStartPoint)> IntervalQueue;
+ IntervalSet Active(lowestEndPoint);
+ IntervalQueue Inactive(lowestStartPoint);
+
+ // Start by building the inactive set.
+ for (auto NId : G.nodeIds()) {
+ unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ LiveInterval &LI = LIS.getInterval(VReg);
+ assert(!LI.empty() && "PBQP graph contains node for empty interval");
+ Inactive.push(std::make_tuple(&LI, 0, NId));
+ }
+
+ while (!Inactive.empty()) {
+ // Tentatively grab the "next" interval - this choice may be overriden
+ // below.
+ IntervalInfo Cur = Inactive.top();
+
+ // Retire any active intervals that end before Cur starts.
+ IntervalSet::iterator RetireItr = Active.begin();
+ while (RetireItr != Active.end() &&
+ (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
+ // If this interval has subsequent segments, add the next one to the
+ // inactive list.
+ if (!isAtLastSegment(*RetireItr))
+ Inactive.push(nextSegment(*RetireItr));
+
+ ++RetireItr;
+ }
+ Active.erase(Active.begin(), RetireItr);
+
+ // One of the newly retired segments may actually start before the
+ // Cur segment, so re-grab the front of the inactive list.
+ Cur = Inactive.top();
+ Inactive.pop();
+
+ // At this point we know that Cur overlaps all active intervals. Add the
+ // interference edges.
+ PBQP::GraphBase::NodeId NId = getNodeId(Cur);
+ for (const auto &A : Active) {
+ PBQP::GraphBase::NodeId MId = getNodeId(A);
+
+ // Do not add an edge when the nodes' allowed registers do not
+ // intersect: there is obviously no interference.
+ if (haveDisjointAllowedRegs(G, NId, MId, D))
+ continue;
+
+ // Check that we haven't already added this edge
+ IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
+ if (EC.count(EK))
+ continue;
+
+ // This is a new edge - add it to the graph.
+ if (!createInterferenceEdge(G, NId, MId, C))
+ setDisjointAllowedRegs(G, NId, MId, D);
+ else
+ EC.insert(EK);
+ }
+
+ // Finally, add Cur to the Active set.
+ Active.insert(Cur);
+ }
+ }
+
+private:
+
+ // Create an Interference edge and add it to the graph, unless it is
+ // a null matrix, meaning the nodes' allowed registers do not have any
+ // interference. This case occurs frequently between integer and floating
+ // point registers for example.
+ // return true iff both nodes interferes.
+ bool createInterferenceEdge(PBQPRAGraph &G,
+ PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
+ IMatrixCache &C) {
+
+ const TargetRegisterInfo &TRI =
+ *G.getMetadata().MF.getSubtarget().getRegisterInfo();
+ const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
+ const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
+
+ // Try looking the edge costs up in the IMatrixCache first.
+ IKey K(&NRegs, &MRegs);
+ IMatrixCache::iterator I = C.find(K);
+ if (I != C.end()) {
+ G.addEdgeBypassingCostAllocator(NId, MId, I->second);
+ return true;
+ }
+
+ PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
+ bool NodesInterfere = false;
+ for (unsigned I = 0; I != NRegs.size(); ++I) {
+ unsigned PRegN = NRegs[I];
+ for (unsigned J = 0; J != MRegs.size(); ++J) {
+ unsigned PRegM = MRegs[J];
+ if (TRI.regsOverlap(PRegN, PRegM)) {
+ M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ NodesInterfere = true;
+ }
+ }
+ }
+
+ if (!NodesInterfere)
+ return false;
+
+ PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
+ C[K] = G.getEdgeCostsPtr(EId);
+
+ return true;
+ }
+};
+
+
+class Coalescing : public PBQPRAConstraint {
+public:
+ void apply(PBQPRAGraph &G) override {
+ MachineFunction &MF = G.getMetadata().MF;
+ MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
+ CoalescerPair CP(*MF.getSubtarget().getRegisterInfo());
+
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (const auto &MBB : MF) {
+ for (const auto &MI : MBB) {
+
+ // Skip not-coalescable or already coalesced copies.
+ if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
+ continue;
+
+ unsigned DstReg = CP.getDstReg();
+ unsigned SrcReg = CP.getSrcReg();
+
+ const float Scale = 1.0f / MBFI.getEntryFreq();
+ PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale;
+
+ if (CP.isPhys()) {
+ if (!MF.getRegInfo().isAllocatable(DstReg))
+ continue;
+
+ PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
+
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
+ G.getNodeMetadata(NId).getAllowedRegs();
+
+ unsigned PRegOpt = 0;
+ while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg)
+ ++PRegOpt;
+
+ if (PRegOpt < Allowed.size()) {
+ PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
+ NewCosts[PRegOpt + 1] -= CBenefit;
+ G.setNodeCosts(NId, std::move(NewCosts));
+ }
+ } else {
+ PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
+ PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
+ &G.getNodeMetadata(N1Id).getAllowedRegs();
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
+ &G.getNodeMetadata(N2Id).getAllowedRegs();
+
+ PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
+ if (EId == G.invalidEdgeId()) {
+ PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
+ Allowed2->size() + 1, 0);
+ addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
+ G.addEdge(N1Id, N2Id, std::move(Costs));
+ } else {
+ if (G.getEdgeNode1Id(EId) == N2Id) {
+ std::swap(N1Id, N2Id);
+ std::swap(Allowed1, Allowed2);
+ }
+ PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
+ addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
+ G.updateEdgeCosts(EId, std::move(Costs));
+ }
+ }
+ }
+ }
+ }
+
+private:
+
+ void addVirtRegCoalesce(
+ PBQPRAGraph::RawMatrix &CostMat,
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
+ const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
+ PBQP::PBQPNum Benefit) {
+ assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
+ assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
+ for (unsigned I = 0; I != Allowed1.size(); ++I) {
+ unsigned PReg1 = Allowed1[I];
+ for (unsigned J = 0; J != Allowed2.size(); ++J) {
+ unsigned PReg2 = Allowed2[J];
+ if (PReg1 == PReg2)
+ CostMat[I + 1][J + 1] -= Benefit;
+ }
+ }
+ }
+
+};
+
+} // End anonymous namespace.
+
+// Out-of-line destructor/anchor for PBQPRAConstraint.
+PBQPRAConstraint::~PBQPRAConstraint() {}
+void PBQPRAConstraint::anchor() {}
+void PBQPRAConstraintList::anchor() {}
+
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesCFG();
+ au.addRequired<AAResultsWrapperPass>();
+ au.addPreserved<AAResultsWrapperPass>();
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.addPreserved<LiveIntervals>();
+ //au.addRequiredID(SplitCriticalEdgesID);
+ if (customPassID)
+ au.addRequiredID(*customPassID);
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineBlockFrequencyInfo>();
+ au.addPreserved<MachineBlockFrequencyInfo>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
+ au.addRequired<VirtRegMap>();
+ au.addPreserved<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
+ LiveIntervals &LIS) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Iterate over all live ranges.
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval &LI = LIS.getInterval(Reg);
+
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!LI.empty()) {
+ VRegsToAlloc.insert(LI.reg);
+ } else {
+ EmptyIntervalVRegs.insert(LI.reg);
+ }
+ }
+}
+
+static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
+ const MachineFunction &MF) {
+ const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSR[i] != 0; ++i)
+ if (TRI.regsOverlap(reg, CSR[i]))
+ return true;
+ return false;
+}
+
+void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
+ Spiller &VRegSpiller) {
+ MachineFunction &MF = G.getMetadata().MF;
+
+ LiveIntervals &LIS = G.getMetadata().LIS;
+ const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
+ const TargetRegisterInfo &TRI =
+ *G.getMetadata().MF.getSubtarget().getRegisterInfo();
+
+ std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
+
+ while (!Worklist.empty()) {
+ unsigned VReg = Worklist.back();
+ Worklist.pop_back();
+
+ const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
+ LiveInterval &VRegLI = LIS.getInterval(VReg);
+
+ // Record any overlaps with regmask operands.
+ BitVector RegMaskOverlaps;
+ LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
+
+ // Compute an initial allowed set for the current vreg.
+ std::vector<unsigned> VRegAllowed;
+ ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
+ for (unsigned I = 0; I != RawPRegOrder.size(); ++I) {
+ unsigned PReg = RawPRegOrder[I];
+ if (MRI.isReserved(PReg))
+ continue;
+
+ // vregLI crosses a regmask operand that clobbers preg.
+ if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg))
+ continue;
+
+ // vregLI overlaps fixed regunit interference.
+ bool Interference = false;
+ for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) {
+ if (VRegLI.overlaps(LIS.getRegUnit(*Units))) {
+ Interference = true;
+ break;
+ }
+ }
+ if (Interference)
+ continue;
+
+ // preg is usable for this virtual register.
+ VRegAllowed.push_back(PReg);
+ }
+
+ // Check for vregs that have no allowed registers. These should be
+ // pre-spilled and the new vregs added to the worklist.
+ if (VRegAllowed.empty()) {
+ SmallVector<unsigned, 8> NewVRegs;
+ spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+ Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
+ continue;
+ }
+
+ PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
+
+ // Tweak cost of callee saved registers, as using then force spilling and
+ // restoring them. This would only happen in the prologue / epilogue though.
+ for (unsigned i = 0; i != VRegAllowed.size(); ++i)
+ if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
+ NodeCosts[1 + i] += 1.0;
+
+ PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
+ G.getNodeMetadata(NId).setVReg(VReg);
+ G.getNodeMetadata(NId).setAllowedRegs(
+ G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
+ G.getMetadata().setNodeIdForVReg(VReg, NId);
+ }
+}
+
+void RegAllocPBQP::spillVReg(unsigned VReg,
+ SmallVectorImpl<unsigned> &NewIntervals,
+ MachineFunction &MF, LiveIntervals &LIS,
+ VirtRegMap &VRM, Spiller &VRegSpiller) {
+
+ VRegsToAlloc.erase(VReg);
+ LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
+ nullptr, &DeadRemats);
+ VRegSpiller.spill(LRE);
+
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ (void)TRI;
+ DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: "
+ << LRE.getParent().weight << ", New vregs: ");
+
+ // Copy any newly inserted live intervals into the list of regs to
+ // allocate.
+ for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end();
+ I != E; ++I) {
+ const LiveInterval &LI = LIS.getInterval(*I);
+ assert(!LI.empty() && "Empty spill range.");
+ DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " ");
+ VRegsToAlloc.insert(LI.reg);
+ }
+
+ DEBUG(dbgs() << ")\n");
+}
+
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
+ const PBQP::Solution &Solution,
+ VirtRegMap &VRM,
+ Spiller &VRegSpiller) {
+ MachineFunction &MF = G.getMetadata().MF;
+ LiveIntervals &LIS = G.getMetadata().LIS;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ (void)TRI;
+
+ // Set to true if we have any spills
+ bool AnotherRoundNeeded = false;
+
+ // Clear the existing allocation.
+ VRM.clearAllVirt();
+
+ // Iterate over the nodes mapping the PBQP solution to a register
+ // assignment.
+ for (auto NId : G.nodeIds()) {
+ unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ unsigned AllocOption = Solution.getSelection(NId);
+
+ if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
+ unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
+ DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> "
+ << TRI.getName(PReg) << "\n");
+ assert(PReg != 0 && "Invalid preg selected.");
+ VRM.assignVirt2Phys(VReg, PReg);
+ } else {
+ // Spill VReg. If this introduces new intervals we'll need another round
+ // of allocation.
+ SmallVector<unsigned, 8> NewVRegs;
+ spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
+ AnotherRoundNeeded |= !NewVRegs.empty();
+ }
+ }
+
+ return !AnotherRoundNeeded;
+}
+
+void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
+ LiveIntervals &LIS,
+ VirtRegMap &VRM) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // First allocate registers for the empty intervals.
+ for (RegSet::const_iterator
+ I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end();
+ I != E; ++I) {
+ LiveInterval &LI = LIS.getInterval(*I);
+
+ unsigned PReg = MRI.getSimpleHint(LI.reg);
+
+ if (PReg == 0) {
+ const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
+ PReg = RC.getRawAllocationOrder(MF).front();
+ }
+
+ VRM.assignVirt2Phys(LI.reg, PReg);
+ }
+}
+
+void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
+ VRegSpiller.postOptimization();
+ /// Remove dead defs because of rematerialization.
+ for (auto DeadInst : DeadRemats) {
+ LIS.RemoveMachineInstrFromMaps(*DeadInst);
+ DeadInst->eraseFromParent();
+ }
+ DeadRemats.clear();
+}
+
+static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
+ unsigned NumInstr) {
+ // All intervals have a spill weight that is mostly proportional to the number
+ // of uses, with uses in loops having a bigger weight.
+ return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1);
+}
+
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineBlockFrequencyInfo &MBFI =
+ getAnalysis<MachineBlockFrequencyInfo>();
+
+ VirtRegMap &VRM = getAnalysis<VirtRegMap>();
+
+ calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(),
+ MBFI, normalizePBQPSpillWeight);
+
+ std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
+
+ MF.getRegInfo().freezeReservedRegs(MF);
+
+ DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
+
+ // Allocator main loop:
+ //
+ // * Map current regalloc problem to a PBQP problem
+ // * Solve the PBQP problem
+ // * Map the solution back to a register allocation
+ // * Spill if necessary
+ //
+ // This process is continued till no more spills are generated.
+
+ // Find the vreg intervals in need of allocation.
+ findVRegIntervalsToAlloc(MF, LIS);
+
+#ifndef NDEBUG
+ const Function &F = *MF.getFunction();
+ std::string FullyQualifiedName =
+ F.getParent()->getModuleIdentifier() + "." + F.getName().str();
+#endif
+
+ // If there are non-empty intervals allocate them using pbqp.
+ if (!VRegsToAlloc.empty()) {
+
+ const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
+ std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
+ llvm::make_unique<PBQPRAConstraintList>();
+ ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
+ ConstraintsRoot->addConstraint(llvm::make_unique<Interference>());
+ if (PBQPCoalescing)
+ ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>());
+ ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
+
+ bool PBQPAllocComplete = false;
+ unsigned Round = 0;
+
+ while (!PBQPAllocComplete) {
+ DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
+
+ PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
+ initializeGraph(G, VRM, *VRegSpiller);
+ ConstraintsRoot->apply(G);
+
+#ifndef NDEBUG
+ if (PBQPDumpGraphs) {
+ std::ostringstream RS;
+ RS << Round;
+ std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
+ ".pbqpgraph";
+ std::error_code EC;
+ raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
+ DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
+ << GraphFileName << "\"\n");
+ G.dump(OS);
+ }
+#endif
+
+ PBQP::Solution Solution = PBQP::RegAlloc::solve(G);
+ PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
+ ++Round;
+ }
+ }
+
+ // Finalise allocation, allocate empty ranges.
+ finalizeAlloc(MF, LIS, VRM);
+ postOptimization(*VRegSpiller, LIS);
+ VRegsToAlloc.clear();
+ EmptyIntervalVRegs.clear();
+
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
+
+ return true;
+}
+
+/// Create Printable object for node and register info.
+static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
+ const PBQP::RegAlloc::PBQPRAGraph &G) {
+ return Printable([NId, &G](raw_ostream &OS) {
+ const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
+ OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
+ });
+}
+
+void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
+ for (auto NId : nodeIds()) {
+ const Vector &Costs = getNodeCosts(NId);
+ assert(Costs.getLength() != 0 && "Empty vector in graph.");
+ OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
+ }
+ OS << '\n';
+
+ for (auto EId : edgeIds()) {
+ NodeId N1Id = getEdgeNode1Id(EId);
+ NodeId N2Id = getEdgeNode2Id(EId);
+ assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
+ const Matrix &M = getEdgeCosts(EId);
+ assert(M.getRows() != 0 && "No rows in matrix.");
+ assert(M.getCols() != 0 && "No cols in matrix.");
+ OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
+ OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
+ OS << M << '\n';
+ }
+}
+
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
+
+void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
+ OS << "graph {\n";
+ for (auto NId : nodeIds()) {
+ OS << " node" << NId << " [ label=\""
+ << PrintNodeInfo(NId, *this) << "\\n"
+ << getNodeCosts(NId) << "\" ]\n";
+ }
+
+ OS << " edge [ len=" << nodeIds().size() << " ]\n";
+ for (auto EId : edgeIds()) {
+ OS << " node" << getEdgeNode1Id(EId)
+ << " -- node" << getEdgeNode2Id(EId)
+ << " [ label=\"";
+ const Matrix &EdgeCosts = getEdgeCosts(EId);
+ for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
+ OS << EdgeCosts.getRowAsVector(i) << "\\n";
+ }
+ OS << "\" ]\n";
+ }
+ OS << "}\n";
+}
+
+FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) {
+ return new RegAllocPBQP(customPassID);
+}
+
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+ return createPBQPRegisterAllocator();
+}
+
+#undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
new file mode 100644
index 000000000000..50b885423375
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -0,0 +1,142 @@
+//===-- RegUsageInfoCollector.cpp - Register Usage Information Collector --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+/// This pass is simple MachineFunction pass which collects register usage
+/// details by iterating through each physical registers and checking
+/// MRI::isPhysRegUsed() then creates a RegMask based on this details.
+/// The pass then stores this RegMask in PhysicalRegisterUsageInfo.cpp
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ip-regalloc"
+
+STATISTIC(NumCSROpt,
+ "Number of functions optimized for callee saved registers");
+
+namespace llvm {
+void initializeRegUsageInfoCollectorPass(PassRegistry &);
+}
+
+namespace {
+class RegUsageInfoCollector : public MachineFunctionPass {
+public:
+ RegUsageInfoCollector() : MachineFunctionPass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeRegUsageInfoCollectorPass(Registry);
+ }
+
+ const char *getPassName() const override {
+ return "Register Usage Information Collector Pass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+
+private:
+ void markRegClobbered(const TargetRegisterInfo *TRI, uint32_t *RegMask,
+ unsigned PReg);
+};
+} // end of anonymous namespace
+
+char RegUsageInfoCollector::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RegUsageInfoCollector, "RegUsageInfoCollector",
+ "Register Usage Information Collector", false, false)
+INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo)
+INITIALIZE_PASS_END(RegUsageInfoCollector, "RegUsageInfoCollector",
+ "Register Usage Information Collector", false, false)
+
+FunctionPass *llvm::createRegUsageInfoCollector() {
+ return new RegUsageInfoCollector();
+}
+
+void RegUsageInfoCollector::markRegClobbered(const TargetRegisterInfo *TRI,
+ uint32_t *RegMask, unsigned PReg) {
+ // If PReg is clobbered then all of its alias are also clobbered.
+ for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
+ RegMask[*AI / 32] &= ~(1u << *AI % 32);
+}
+
+void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<PhysicalRegisterUsageInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetMachine &TM = MF.getTarget();
+
+ DEBUG(dbgs() << " -------------------- " << getPassName()
+ << " -------------------- \n");
+ DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
+
+ std::vector<uint32_t> RegMask;
+
+ // Compute the size of the bit vector to represent all the registers.
+ // The bit vector is broken into 32-bit chunks, thus takes the ceil of
+ // the number of registers divided by 32 for the size.
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+ RegMask.resize(RegMaskSize, 0xFFFFFFFF);
+
+ const Function *F = MF.getFunction();
+
+ PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
+
+ PRUI->setTargetMachine(&TM);
+
+ DEBUG(dbgs() << "Clobbered Registers: ");
+
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
+ if (MRI->isPhysRegModified(PReg, true))
+ markRegClobbered(TRI, &RegMask[0], PReg);
+
+ if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
+ const uint32_t *CallPreservedMask =
+ TRI->getCallPreservedMask(MF, F->getCallingConv());
+ // Set callee saved register as preserved.
+ for (unsigned i = 0; i < RegMaskSize; ++i)
+ RegMask[i] = RegMask[i] | CallPreservedMask[i];
+ } else {
+ ++NumCSROpt;
+ DEBUG(dbgs() << MF.getName()
+ << " function optimized for not having CSR.\n");
+ }
+
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
+ if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
+ DEBUG(dbgs() << TRI->getName(PReg) << " ");
+
+ DEBUG(dbgs() << " \n----------------------------------------\n");
+
+ PRUI->storeUpdateRegUsageInfo(F, std::move(RegMask));
+
+ return false;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
new file mode 100644
index 000000000000..759566147cc7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -0,0 +1,131 @@
+//=--- RegUsageInfoPropagate.cpp - Register Usage Informartion Propagation --=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+/// This pass iterates through MachineInstrs in a given MachineFunction and at
+/// each callsite queries RegisterUsageInfo for RegMask (calculated based on
+/// actual register allocation) of the callee function, if the RegMask detail
+/// is available then this pass will update the RegMask of the call instruction.
+/// This updated RegMask will be used by the register allocator while allocating
+/// the current MachineFunction.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <map>
+#include <string>
+
+namespace llvm {
+void initializeRegUsageInfoPropagationPassPass(PassRegistry &);
+}
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ip-regalloc"
+
+#define RUIP_NAME "Register Usage Information Propagation"
+
+namespace {
+class RegUsageInfoPropagationPass : public MachineFunctionPass {
+
+public:
+ RegUsageInfoPropagationPass() : MachineFunctionPass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeRegUsageInfoPropagationPassPass(Registry);
+ }
+
+ const char *getPassName() const override { return RUIP_NAME; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ static char ID;
+
+private:
+ static void setRegMask(MachineInstr &MI, const uint32_t *RegMask) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask())
+ MO.setRegMask(RegMask);
+ }
+ }
+};
+} // end of anonymous namespace
+char RegUsageInfoPropagationPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(RegUsageInfoPropagationPass, "reg-usage-propagation",
+ RUIP_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo)
+INITIALIZE_PASS_END(RegUsageInfoPropagationPass, "reg-usage-propagation",
+ RUIP_NAME, false, false)
+
+FunctionPass *llvm::createRegUsageInfoPropPass() {
+ return new RegUsageInfoPropagationPass();
+}
+
+void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<PhysicalRegisterUsageInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) {
+ const Module *M = MF.getFunction()->getParent();
+ PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>();
+
+ DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName()
+ << " ++++++++++++++++++++ \n");
+ DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n");
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isCall())
+ continue;
+ DEBUG(dbgs()
+ << "Call Instruction Before Register Usage Info Propagation : \n");
+ DEBUG(dbgs() << MI << "\n");
+
+ auto UpdateRegMask = [&](const Function *F) {
+ const auto *RegMask = PRUI->getRegUsageInfo(F);
+ if (!RegMask)
+ return;
+ setRegMask(MI, &(*RegMask)[0]);
+ Changed = true;
+ };
+
+ MachineOperand &Operand = MI.getOperand(0);
+ if (Operand.isGlobal())
+ UpdateRegMask(cast<Function>(Operand.getGlobal()));
+ else if (Operand.isSymbol())
+ UpdateRegMask(M->getFunction(Operand.getSymbolName()));
+
+ DEBUG(dbgs()
+ << "Call Instruction After Register Usage Info Propagation : \n");
+ DEBUG(dbgs() << MI << "\n");
+ }
+ }
+
+ DEBUG(dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
+ "++++++ \n");
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
new file mode 100644
index 000000000000..178fa18ac5a6
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -0,0 +1,181 @@
+//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterClassInfo class which provides dynamic
+// information about target register classes. Callee-saved vs. caller-saved and
+// reserved registers depend on calling conventions and other dynamic
+// information, so some things cannot be determined statically.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+static cl::opt<unsigned>
+StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
+ cl::desc("Limit all regclasses to N registers"));
+
+RegisterClassInfo::RegisterClassInfo()
+ : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {}
+
+void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
+ bool Update = false;
+ MF = &mf;
+
+ // Allocate new array the first time we see a new target.
+ if (MF->getSubtarget().getRegisterInfo() != TRI) {
+ TRI = MF->getSubtarget().getRegisterInfo();
+ RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
+ unsigned NumPSets = TRI->getNumRegPressureSets();
+ PSetLimits.reset(new unsigned[NumPSets]);
+ std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0);
+ Update = true;
+ }
+
+ // Does this MF have different CSRs?
+ assert(TRI && "no register info set");
+ const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
+ if (Update || CSR != CalleeSaved) {
+ // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+ // overlapping CSR.
+ CSRNum.clear();
+ CSRNum.resize(TRI->getNumRegs(), 0);
+ for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ Update = true;
+ }
+ CalleeSaved = CSR;
+
+ // Different reserved registers?
+ const BitVector &RR = MF->getRegInfo().getReservedRegs();
+ if (Reserved.size() != RR.size() || RR != Reserved) {
+ Update = true;
+ Reserved = RR;
+ }
+
+ // Invalidate cached information from previous function.
+ if (Update)
+ ++Tag;
+}
+
+/// compute - Compute the preferred allocation order for RC with reserved
+/// registers filtered out. Volatile registers come first followed by CSR
+/// aliases ordered according to the CSR order specified by the target.
+void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
+ assert(RC && "no register class given");
+ RCInfo &RCI = RegClass[RC->getID()];
+
+ // Raw register count, including all reserved regs.
+ unsigned NumRegs = RC->getNumRegs();
+
+ if (!RCI.Order)
+ RCI.Order.reset(new MCPhysReg[NumRegs]);
+
+ unsigned N = 0;
+ SmallVector<MCPhysReg, 16> CSRAlias;
+ unsigned MinCost = 0xff;
+ unsigned LastCost = ~0u;
+ unsigned LastCostChange = 0;
+
+ // FIXME: Once targets reserve registers instead of removing them from the
+ // allocation order, we can simply use begin/end here.
+ ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
+ for (unsigned i = 0; i != RawOrder.size(); ++i) {
+ unsigned PhysReg = RawOrder[i];
+ // Remove reserved registers from the allocation order.
+ if (Reserved.test(PhysReg))
+ continue;
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+ MinCost = std::min(MinCost, Cost);
+
+ if (CSRNum[PhysReg])
+ // PhysReg aliases a CSR, save it for later.
+ CSRAlias.push_back(PhysReg);
+ else {
+ if (Cost != LastCost)
+ LastCostChange = N;
+ RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
+ }
+ RCI.NumRegs = N + CSRAlias.size();
+ assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+
+ // CSR aliases go after the volatile registers, preserve the target's order.
+ for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
+ unsigned PhysReg = CSRAlias[i];
+ unsigned Cost = TRI->getCostPerUse(PhysReg);
+ if (Cost != LastCost)
+ LastCostChange = N;
+ RCI.Order[N++] = PhysReg;
+ LastCost = Cost;
+ }
+
+ // Register allocator stress test. Clip register class to N registers.
+ if (StressRA && RCI.NumRegs > StressRA)
+ RCI.NumRegs = StressRA;
+
+ // Check if RC is a proper sub-class.
+ if (const TargetRegisterClass *Super =
+ TRI->getLargestLegalSuperClass(RC, *MF))
+ if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
+ RCI.ProperSubClass = true;
+
+ RCI.MinCost = uint8_t(MinCost);
+ RCI.LastCostChange = LastCostChange;
+
+ DEBUG({
+ dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = [";
+ for (unsigned I = 0; I != RCI.NumRegs; ++I)
+ dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
+ dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n");
+ });
+
+ // RCI is now up-to-date.
+ RCI.Tag = Tag;
+}
+
+/// This is not accurate because two overlapping register sets may have some
+/// nonoverlapping reserved registers. However, computing the allocation order
+/// for all register classes would be too expensive.
+unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
+ const TargetRegisterClass *RC = nullptr;
+ unsigned NumRCUnits = 0;
+ for (TargetRegisterInfo::regclass_iterator
+ RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) {
+ const int *PSetID = TRI->getRegClassPressureSets(*RI);
+ for (; *PSetID != -1; ++PSetID) {
+ if ((unsigned)*PSetID == Idx)
+ break;
+ }
+ if (*PSetID == -1)
+ continue;
+
+ // Found a register class that counts against this pressure set.
+ // For efficiency, only compute the set order for the largest set.
+ unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit;
+ if (!RC || NUnits > NumRCUnits) {
+ RC = *RI;
+ NumRCUnits = NUnits;
+ }
+ }
+ compute(RC);
+ unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
+ return TRI->getRegPressureSetLimit(*MF, Idx) -
+ TRI->getRegClassWeight(RC).RegWeight * NReserved;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 000000000000..617ece902e0e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,3058 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegisterCoalescer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(numJoins , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends , "Number of copies extended");
+STATISTIC(NumReMats , "Number of instructions re-materialized");
+STATISTIC(NumInflated , "Number of register classes inflated");
+STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
+STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
+
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+ cl::desc("Coalesce copies (default=true)"),
+ cl::init(true));
+
+static cl::opt<bool> UseTerminalRule("terminal-rule",
+ cl::desc("Apply the terminal rule"),
+ cl::init(false), cl::Hidden);
+
+/// Temporary flag to test critical edge unsplitting.
+static cl::opt<bool>
+EnableJoinSplits("join-splitedges",
+ cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
+
+/// Temporary flag to test global copy optimization.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalCopies("join-globalcopies",
+ cl::desc("Coalesce copies that span blocks (default=subtarget)"),
+ cl::init(cl::BOU_UNSET), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+ cl::desc("Verify machine instrs before and after register coalescing"),
+ cl::Hidden);
+
+namespace {
+ class RegisterCoalescer : public MachineFunctionPass,
+ private LiveRangeEdit::Delegate {
+ MachineFunction* MF;
+ MachineRegisterInfo* MRI;
+ const TargetMachine* TM;
+ const TargetRegisterInfo* TRI;
+ const TargetInstrInfo* TII;
+ LiveIntervals *LIS;
+ const MachineLoopInfo* Loops;
+ AliasAnalysis *AA;
+ RegisterClassInfo RegClassInfo;
+
+ /// A LaneMask to remember on which subregister live ranges we need to call
+ /// shrinkToUses() later.
+ LaneBitmask ShrinkMask;
+
+ /// True if the main range of the currently coalesced intervals should be
+ /// checked for smaller live intervals.
+ bool ShrinkMainRange;
+
+ /// \brief True if the coalescer should aggressively coalesce global copies
+ /// in favor of keeping local copies.
+ bool JoinGlobalCopies;
+
+ /// \brief True if the coalescer should aggressively coalesce fall-thru
+ /// blocks exclusively containing copies.
+ bool JoinSplitEdges;
+
+ /// Copy instructions yet to be coalesced.
+ SmallVector<MachineInstr*, 8> WorkList;
+ SmallVector<MachineInstr*, 8> LocalWorkList;
+
+ /// Set of instruction pointers that have been erased, and
+ /// that may be present in WorkList.
+ SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
+
+ /// Dead instructions that are about to be deleted.
+ SmallVector<MachineInstr*, 8> DeadDefs;
+
+ /// Virtual registers to be considered for register class inflation.
+ SmallVector<unsigned, 8> InflateRegs;
+
+ /// Recursively eliminate dead defs in DeadDefs.
+ void eliminateDeadDefs();
+
+ /// LiveRangeEdit callback for eliminateDeadDefs().
+ void LRE_WillEraseInstruction(MachineInstr *MI) override;
+
+ /// Coalesce the LocalWorkList.
+ void coalesceLocals();
+
+ /// Join compatible live intervals
+ void joinAllIntervals();
+
+ /// Coalesce copies in the specified MBB, putting
+ /// copies that cannot yet be coalesced into WorkList.
+ void copyCoalesceInMBB(MachineBasicBlock *MBB);
+
+ /// Tries to coalesce all copies in CurrList. Returns true if any progress
+ /// was made.
+ bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
+
+ /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
+ /// src/dst of the copy instruction CopyMI. This returns true if the copy
+ /// was successfully coalesced away. If it is not currently possible to
+ /// coalesce this interval, but it may be possible if other things get
+ /// coalesced, then it returns true by reference in 'Again'.
+ bool joinCopy(MachineInstr *TheCopy, bool &Again);
+
+ /// Attempt to join these two intervals. On failure, this
+ /// returns false. The output "SrcInt" will not have been modified, so we
+ /// can use this information below to update aliases.
+ bool joinIntervals(CoalescerPair &CP);
+
+ /// Attempt joining two virtual registers. Return true on success.
+ bool joinVirtRegs(CoalescerPair &CP);
+
+ /// Attempt joining with a reserved physreg.
+ bool joinReservedPhysReg(CoalescerPair &CP);
+
+ /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
+ /// Subranges in @p LI which only partially interfere with the desired
+ /// LaneMask are split as necessary. @p LaneMask are the lanes that
+ /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
+ /// lanemasks already adjusted to the coalesced register.
+ void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+ LaneBitmask LaneMask, CoalescerPair &CP);
+
+ /// Join the liveranges of two subregisters. Joins @p RRange into
+ /// @p LRange, @p RRange may be invalid afterwards.
+ void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ LaneBitmask LaneMask, const CoalescerPair &CP);
+
+ /// We found a non-trivially-coalescable copy. If the source value number is
+ /// defined by a copy from the destination reg see if we can merge these two
+ /// destination reg valno# into a single value number, eliminating a copy.
+ /// This returns true if an interval was modified.
+ bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+
+ /// Return true if there are definitions of IntB
+ /// other than BValNo val# that can reach uses of AValno val# of IntA.
+ bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+ VNInfo *AValNo, VNInfo *BValNo);
+
+ /// We found a non-trivially-coalescable copy.
+ /// If the source value number is defined by a commutable instruction and
+ /// its other operand is coalesced to the copy dest register, see if we
+ /// can transform the copy into a noop by commuting the definition.
+ /// This returns true if an interval was modified.
+ bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+
+ /// If the source of a copy is defined by a
+ /// trivial computation, replace the copy by rematerialize the definition.
+ bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
+ bool &IsDefCopy);
+
+ /// Return true if a copy involving a physreg should be joined.
+ bool canJoinPhys(const CoalescerPair &CP);
+
+ /// Replace all defs and uses of SrcReg to DstReg and update the subregister
+ /// number if it is not zero. If DstReg is a physical register and the
+ /// existing subregister number of the def / use being updated is not zero,
+ /// make sure to set it to the correct physical subregister.
+ void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+
+ /// If the given machine operand reads only undefined lanes add an undef
+ /// flag.
+ /// This can happen when undef uses were previously concealed by a copy
+ /// which we coalesced. Example:
+ /// %vreg0:sub0<def,read-undef> = ...
+ /// %vreg1 = COPY %vreg0 <-- Coalescing COPY reveals undef
+ /// = use %vreg1:sub1 <-- hidden undef use
+ void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
+ MachineOperand &MO, unsigned SubRegIdx);
+
+ /// Handle copies of undef values.
+ /// Returns true if @p CopyMI was a copy of an undef value and eliminated.
+ bool eliminateUndefCopy(MachineInstr *CopyMI);
+
+ /// Check whether or not we should apply the terminal rule on the
+ /// destination (Dst) of \p Copy.
+ /// When the terminal rule applies, Copy is not profitable to
+ /// coalesce.
+ /// Dst is terminal if it has exactly one affinity (Dst, Src) and
+ /// at least one interference (Dst, Dst2). If Dst is terminal, the
+ /// terminal rule consists in checking that at least one of
+ /// interfering node, say Dst2, has an affinity of equal or greater
+ /// weight with Src.
+ /// In that case, Dst2 and Dst will not be able to be both coalesced
+ /// with Src. Since Dst2 exposes more coalescing opportunities than
+ /// Dst, we can drop \p Copy.
+ bool applyTerminalRule(const MachineInstr &Copy) const;
+
+ /// Wrapper method for \see LiveIntervals::shrinkToUses.
+ /// This method does the proper fixing of the live-ranges when the afore
+ /// mentioned method returns true.
+ void shrinkToUses(LiveInterval *LI,
+ SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
+ if (LIS->shrinkToUses(LI, Dead)) {
+ /// Check whether or not \p LI is composed by multiple connected
+ /// components and if that is the case, fix that.
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS->splitSeparateComponents(*LI, SplitLIs);
+ }
+ }
+
+ public:
+ static char ID; ///< Class identification, replacement for typeinfo
+ RegisterCoalescer() : MachineFunctionPass(ID) {
+ initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ void releaseMemory() override;
+
+ /// This is the pass entry point.
+ bool runOnMachineFunction(MachineFunction&) override;
+
+ /// Implement the dump method.
+ void print(raw_ostream &O, const Module* = nullptr) const override;
+ };
+} // end anonymous namespace
+
+char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
+
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
+ "Simple Register Coalescing", false, false)
+
+char RegisterCoalescer::ID = 0;
+
+static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
+ unsigned &Src, unsigned &Dst,
+ unsigned &SrcSub, unsigned &DstSub) {
+ if (MI->isCopy()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = MI->getOperand(0).getSubReg();
+ Src = MI->getOperand(1).getReg();
+ SrcSub = MI->getOperand(1).getSubReg();
+ } else if (MI->isSubregToReg()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
+ MI->getOperand(3).getImm());
+ Src = MI->getOperand(2).getReg();
+ SrcSub = MI->getOperand(2).getSubReg();
+ } else
+ return false;
+ return true;
+}
+
+/// Return true if this block should be vacated by the coalescer to eliminate
+/// branches. The important cases to handle in the coalescer are critical edges
+/// split during phi elimination which contain only copies. Simple blocks that
+/// contain non-branches should also be vacated, but this can be handled by an
+/// earlier pass similar to early if-conversion.
+static bool isSplitEdge(const MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+
+ for (const auto &MI : *MBB) {
+ if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
+ return false;
+ }
+ return true;
+}
+
+bool CoalescerPair::setRegisters(const MachineInstr *MI) {
+ SrcReg = DstReg = 0;
+ SrcIdx = DstIdx = 0;
+ NewRC = nullptr;
+ Flipped = CrossClass = false;
+
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+ Partial = SrcSub || DstSub;
+
+ // If one register is a physreg, it must be Dst.
+ if (TargetRegisterInfo::isPhysicalRegister(Src)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ Flipped = true;
+ }
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+ // Eliminate DstSub on a physreg.
+ if (DstSub) {
+ Dst = TRI.getSubReg(Dst, DstSub);
+ if (!Dst) return false;
+ DstSub = 0;
+ }
+
+ // Eliminate SrcSub by picking a corresponding Dst superregister.
+ if (SrcSub) {
+ Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+ if (!Dst) return false;
+ } else if (!MRI.getRegClass(Src)->contains(Dst)) {
+ return false;
+ }
+ } else {
+ // Both registers are virtual.
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+
+ // Both registers have subreg indices.
+ if (SrcSub && DstSub) {
+ // Copies between different sub-registers are never coalescable.
+ if (Src == Dst && SrcSub != DstSub)
+ return false;
+
+ NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
+ SrcIdx, DstIdx);
+ if (!NewRC)
+ return false;
+ } else if (DstSub) {
+ // SrcReg will be merged with a sub-register of DstReg.
+ SrcIdx = DstSub;
+ NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ } else if (SrcSub) {
+ // DstReg will be merged with a sub-register of SrcReg.
+ DstIdx = SrcSub;
+ NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
+ } else {
+ // This is a straight copy without sub-registers.
+ NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
+ }
+
+ // The combined constraint may be impossible to satisfy.
+ if (!NewRC)
+ return false;
+
+ // Prefer SrcReg to be a sub-register of DstReg.
+ // FIXME: Coalescer should support subregs symmetrically.
+ if (DstIdx && !SrcIdx) {
+ std::swap(Src, Dst);
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
+ }
+
+ CrossClass = NewRC != DstRC || NewRC != SrcRC;
+ }
+ // Check our invariants
+ assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
+ assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+ "Cannot have a physical SubIdx");
+ SrcReg = Src;
+ DstReg = Dst;
+ return true;
+}
+
+bool CoalescerPair::flip() {
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return false;
+ std::swap(SrcReg, DstReg);
+ std::swap(SrcIdx, DstIdx);
+ Flipped = !Flipped;
+ return true;
+}
+
+bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
+ return false;
+
+ // Find the virtual register that is SrcReg.
+ if (Dst == SrcReg) {
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ } else if (Src != SrcReg) {
+ return false;
+ }
+
+ // Now check that Dst matches DstReg.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
+ // DstSub could be set for a physreg from INSERT_SUBREG.
+ if (DstSub)
+ Dst = TRI.getSubReg(Dst, DstSub);
+ // Full copy of Src.
+ if (!SrcSub)
+ return DstReg == Dst;
+ // This is a partial register copy. Check that the parts match.
+ return TRI.getSubReg(DstReg, SrcSub) == Dst;
+ } else {
+ // DstReg is virtual.
+ if (DstReg != Dst)
+ return false;
+ // Registers match, do the subregisters line up?
+ return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
+ TRI.composeSubRegIndices(DstIdx, DstSub);
+ }
+}
+
+void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RegisterCoalescer::eliminateDeadDefs() {
+ SmallVector<unsigned, 8> NewRegs;
+ LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
+ nullptr, this).eliminateDeadDefs(DeadDefs);
+}
+
+void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
+ // MI may be in WorkList. Make sure we don't visit it.
+ ErasedInstrs.insert(MI);
+}
+
+bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert(!CP.isPartial() && "This doesn't work for partial copies.");
+ assert(!CP.isPhys() && "This doesn't work for physreg copies.");
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+ SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+
+ // We have a non-trivially-coalescable copy with IntA being the source and
+ // IntB being the dest, thus this defines a value number in IntB. If the
+ // source value number (in IntA) is defined by a copy from B, see if we can
+ // merge these two pieces of B into a single value number, eliminating a copy.
+ // For example:
+ //
+ // A3 = B0
+ // ...
+ // B1 = A3 <- this copy
+ //
+ // In this case, B0 can be extended to where the B1 copy lives, allowing the
+ // B1 value number to be replaced with B0 (which simplifies the B
+ // liveinterval).
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B1' in
+ // the example above.
+ LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
+ if (BS == IntB.end()) return false;
+ VNInfo *BValNo = BS->valno;
+
+ // Get the location that B is defined at. Two options: either this value has
+ // an unknown definition point or it is defined at CopyIdx. If unknown, we
+ // can't process it.
+ if (BValNo->def != CopyIdx) return false;
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
+ LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
+ // The live segment might not exist after fun with physreg coalescing.
+ if (AS == IntA.end()) return false;
+ VNInfo *AValNo = AS->valno;
+
+ // If AValNo is defined as a copy from IntB, we can potentially process this.
+ // Get the instruction that defines this value number.
+ MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
+ // Don't allow any partial copies, even if isCoalescable() allows them.
+ if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
+ return false;
+
+ // Get the Segment in IntB that this value number starts with.
+ LiveInterval::iterator ValS =
+ IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
+ if (ValS == IntB.end())
+ return false;
+
+ // Make sure that the end of the live segment is inside the same block as
+ // CopyMI.
+ MachineInstr *ValSEndInst =
+ LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
+ if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
+ return false;
+
+ // Okay, we now know that ValS ends in the same block that the CopyMI
+ // live-range starts. If there are no intervening live segments between them
+ // in IntB, we can merge them.
+ if (ValS+1 != BS) return false;
+
+ DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
+
+ SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
+ // We are about to delete CopyMI, so need to remove it as the 'instruction
+ // that defines this value #'. Update the valnum with the new defining
+ // instruction #.
+ BValNo->def = FillerStart;
+
+ // Okay, we can merge them. We need to insert a new liverange:
+ // [ValS.end, BS.begin) of either value number, then we merge the
+ // two value numbers.
+ IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
+
+ // Okay, merge "B1" into the same value number as "B0".
+ if (BValNo != ValS->valno)
+ IntB.MergeValueNumberInto(BValNo, ValS->valno);
+
+ // Do the same for the subregister segments.
+ for (LiveInterval::SubRange &S : IntB.subranges()) {
+ VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
+ S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
+ VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
+ if (SubBValNo != SubValSNo)
+ S.MergeValueNumberInto(SubBValNo, SubValSNo);
+ }
+
+ DEBUG(dbgs() << " result = " << IntB << '\n');
+
+ // If the source instruction was killing the source register before the
+ // merge, unset the isKill marker given the live range has been extended.
+ int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ if (UIdx != -1) {
+ ValSEndInst->getOperand(UIdx).setIsKill(false);
+ }
+
+ // Rewrite the copy. If the copy instruction was killing the destination
+ // register before the merge, find the last use and trim the live range. That
+ // will also add the isKill marker.
+ CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
+ if (AS->end == CopyIdx)
+ shrinkToUses(&IntA);
+
+ ++numExtends;
+ return true;
+}
+
+bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
+ LiveInterval &IntB,
+ VNInfo *AValNo,
+ VNInfo *BValNo) {
+ // If AValNo has PHI kills, conservatively assume that IntB defs can reach
+ // the PHI values.
+ if (LIS->hasPHIKill(IntA, AValNo))
+ return true;
+
+ for (LiveRange::Segment &ASeg : IntA.segments) {
+ if (ASeg.valno != AValNo) continue;
+ LiveInterval::iterator BI =
+ std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
+ if (BI != IntB.begin())
+ --BI;
+ for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
+ if (BI->valno == BValNo)
+ continue;
+ if (BI->start <= ASeg.start && BI->end > ASeg.start)
+ return true;
+ if (BI->start > ASeg.start && BI->start < ASeg.end)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Copy segements with value number @p SrcValNo from liverange @p Src to live
+/// range @Dst and use value number @p DstValNo there.
+static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
+ const LiveRange &Src, const VNInfo *SrcValNo)
+{
+ for (const LiveRange::Segment &S : Src.segments) {
+ if (S.valno != SrcValNo)
+ continue;
+ Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
+ }
+}
+
+bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI) {
+ assert(!CP.isPhys());
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // We found a non-trivially-coalescable copy with IntA being the source and
+ // IntB being the dest, thus this defines a value number in IntB. If the
+ // source value number (in IntA) is defined by a commutable instruction and
+ // its other operand is coalesced to the copy dest register, see if we can
+ // transform the copy into a noop by commuting the definition. For example,
+ //
+ // A3 = op A2 B0<kill>
+ // ...
+ // B1 = A3 <- this copy
+ // ...
+ // = op A3 <- more uses
+ //
+ // ==>
+ //
+ // B2 = op B0 A2<kill>
+ // ...
+ // B1 = B2 <- now an identity copy
+ // ...
+ // = op B2 <- more uses
+
+ // BValNo is a value number in B that is defined by a copy from A. 'B1' in
+ // the example above.
+ SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+ VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+ assert(BValNo != nullptr && BValNo->def == CopyIdx);
+
+ // AValNo is the value number in A that defines the copy, A3 in the example.
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
+ assert(AValNo && !AValNo->isUnused() && "COPY source not live");
+ if (AValNo->isPHIDef())
+ return false;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
+ if (!DefMI)
+ return false;
+ if (!DefMI->isCommutable())
+ return false;
+ // If DefMI is a two-address instruction then commuting it will change the
+ // destination register.
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+ assert(DefIdx != -1);
+ unsigned UseOpIdx;
+ if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+ return false;
+
+ // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
+ // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
+ // passed to the method. That _other_ operand is chosen by
+ // the findCommutedOpIndices() method.
+ //
+ // That is obviously an area for improvement in case of instructions having
+ // more than 2 operands. For example, if some instruction has 3 commutable
+ // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
+ // op#2<->op#3) of commute transformation should be considered/tried here.
+ unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
+ return false;
+
+ MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+ unsigned NewReg = NewDstMO.getReg();
+ if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
+ return false;
+
+ // Make sure there are no other definitions of IntB that would reach the
+ // uses which the new definition can reach.
+ if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+ return false;
+
+ // If some of the uses of IntA.reg is already coalesced away, return false.
+ // It's not possible to determine whether it's safe to perform the coalescing.
+ for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) {
+ MachineInstr *UseMI = MO.getParent();
+ unsigned OpNo = &MO - &UseMI->getOperand(0);
+ SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
+ LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
+ if (US == IntA.end() || US->valno != AValNo)
+ continue;
+ // If this use is tied to a def, we can't rewrite the register.
+ if (UseMI->isRegTiedToDefOperand(OpNo))
+ return false;
+ }
+
+ DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
+ << *DefMI);
+
+ // At this point we have decided that it is legal to do this
+ // transformation. Start by commuting the instruction.
+ MachineBasicBlock *MBB = DefMI->getParent();
+ MachineInstr *NewMI =
+ TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
+ if (!NewMI)
+ return false;
+ if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
+ TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+ !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
+ return false;
+ if (NewMI != DefMI) {
+ LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
+ MachineBasicBlock::iterator Pos = DefMI;
+ MBB->insert(Pos, NewMI);
+ MBB->erase(DefMI);
+ }
+
+ // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+ // A = or A, B
+ // ...
+ // B = A
+ // ...
+ // C = A<kill>
+ // ...
+ // = B
+
+ // Update uses of IntA of the specific Val# with IntB.
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
+ UE = MRI->use_end();
+ UI != UE; /* ++UI is below because of possible MI removal */) {
+ MachineOperand &UseMO = *UI;
+ ++UI;
+ if (UseMO.isUndef())
+ continue;
+ MachineInstr *UseMI = UseMO.getParent();
+ if (UseMI->isDebugValue()) {
+ // FIXME These don't have an instruction index. Not clear we have enough
+ // info to decide whether to do this replacement or not. For now do it.
+ UseMO.setReg(NewReg);
+ continue;
+ }
+ SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
+ LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
+ assert(US != IntA.end() && "Use must be live");
+ if (US->valno != AValNo)
+ continue;
+ // Kill flags are no longer accurate. They are recomputed after RA.
+ UseMO.setIsKill(false);
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ UseMO.substPhysReg(NewReg, *TRI);
+ else
+ UseMO.setReg(NewReg);
+ if (UseMI == CopyMI)
+ continue;
+ if (!UseMI->isCopy())
+ continue;
+ if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+
+ // This copy will become a noop. If it's defining a new val#, merge it into
+ // BValNo.
+ SlotIndex DefIdx = UseIdx.getRegSlot();
+ VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+ if (!DVNI)
+ continue;
+ DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+ assert(DVNI->def == DefIdx);
+ BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
+ for (LiveInterval::SubRange &S : IntB.subranges()) {
+ VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
+ if (!SubDVNI)
+ continue;
+ VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
+ assert(SubBValNo->def == CopyIdx);
+ S.MergeValueNumberInto(SubDVNI, SubBValNo);
+ }
+
+ ErasedInstrs.insert(UseMI);
+ LIS->RemoveMachineInstrFromMaps(*UseMI);
+ UseMI->eraseFromParent();
+ }
+
+ // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
+ // is updated.
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ if (IntB.hasSubRanges()) {
+ if (!IntA.hasSubRanges()) {
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
+ IntA.createSubRangeFrom(Allocator, Mask, IntA);
+ }
+ SlotIndex AIdx = CopyIdx.getRegSlot(true);
+ for (LiveInterval::SubRange &SA : IntA.subranges()) {
+ VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
+ assert(ASubValNo != nullptr);
+
+ LaneBitmask AMask = SA.LaneMask;
+ for (LiveInterval::SubRange &SB : IntB.subranges()) {
+ LaneBitmask BMask = SB.LaneMask;
+ LaneBitmask Common = BMask & AMask;
+ if (Common == 0)
+ continue;
+
+ DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
+ << " into " << PrintLaneMask(Common) << '\n');
+ LaneBitmask BRest = BMask & ~AMask;
+ LiveInterval::SubRange *CommonRange;
+ if (BRest != 0) {
+ SB.LaneMask = BRest;
+ DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
+ << '\n');
+ // Duplicate SubRange for newly merged common stuff.
+ CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB);
+ } else {
+ // We van reuse the L SubRange.
+ SB.LaneMask = Common;
+ CommonRange = &SB;
+ }
+ LiveRange RangeCopy(SB, Allocator);
+
+ VNInfo *BSubValNo = CommonRange->getVNInfoAt(CopyIdx);
+ assert(BSubValNo->def == CopyIdx);
+ BSubValNo->def = ASubValNo->def;
+ addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo);
+ AMask &= ~BMask;
+ }
+ if (AMask != 0) {
+ DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
+ LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
+ VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
+ addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
+ }
+ }
+ }
+
+ BValNo->def = AValNo->def;
+ addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
+ DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+ LIS->removeVRegDefAt(IntA, AValNo->def);
+
+ DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
+ ++numCommutes;
+ return true;
+}
+
+/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
+/// defining a subregister.
+static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ "This code cannot handle physreg aliasing");
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ continue;
+ // Return true if we define the full register or don't care about the value
+ // inside other subregisters.
+ if (Op.getSubReg() == 0 || Op.isUndef())
+ return true;
+ }
+ return false;
+}
+
+bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
+ MachineInstr *CopyMI,
+ bool &IsDefCopy) {
+ IsDefCopy = false;
+ unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+ unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
+ unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ LiveInterval &SrcInt = LIS->getInterval(SrcReg);
+ SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
+ VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
+ assert(ValNo && "CopyMI input register not live");
+ if (ValNo->isPHIDef() || ValNo->isUnused())
+ return false;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
+ if (!DefMI)
+ return false;
+ if (DefMI->isCopyLike()) {
+ IsDefCopy = true;
+ return false;
+ }
+ if (!TII->isAsCheapAsAMove(*DefMI))
+ return false;
+ if (!TII->isTriviallyReMaterializable(*DefMI, AA))
+ return false;
+ if (!definesFullReg(*DefMI, SrcReg))
+ return false;
+ bool SawStore = false;
+ if (!DefMI->isSafeToMove(AA, SawStore))
+ return false;
+ const MCInstrDesc &MCID = DefMI->getDesc();
+ if (MCID.getNumDefs() != 1)
+ return false;
+ // Only support subregister destinations when the def is read-undef.
+ MachineOperand &DstOperand = CopyMI->getOperand(0);
+ unsigned CopyDstReg = DstOperand.getReg();
+ if (DstOperand.getSubReg() && !DstOperand.isUndef())
+ return false;
+
+ // If both SrcIdx and DstIdx are set, correct rematerialization would widen
+ // the register substantially (beyond both source and dest size). This is bad
+ // for performance since it can cascade through a function, introducing many
+ // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
+ // around after a few subreg copies).
+ if (SrcIdx && DstIdx)
+ return false;
+
+ const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
+ if (!DefMI->isImplicitDef()) {
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ unsigned NewDstReg = DstReg;
+
+ unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
+ DefMI->getOperand(0).getSubReg());
+ if (NewDstIdx)
+ NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
+
+ // Finally, make sure that the physical subregister that will be
+ // constructed later is permitted for the instruction.
+ if (!DefRC->contains(NewDstReg))
+ return false;
+ } else {
+ // Theoretically, some stack frame reference could exist. Just make sure
+ // it hasn't actually happened.
+ assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ "Only expect to deal with virtual or physical registers");
+ }
+ }
+
+ DebugLoc DL = CopyMI->getDebugLoc();
+ MachineBasicBlock *MBB = CopyMI->getParent();
+ MachineBasicBlock::iterator MII =
+ std::next(MachineBasicBlock::iterator(CopyMI));
+ TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
+ MachineInstr &NewMI = *std::prev(MII);
+ NewMI.setDebugLoc(DL);
+
+ // In a situation like the following:
+ // %vreg0:subreg = instr ; DefMI, subreg = DstIdx
+ // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0
+ // instead of widening %vreg1 to the register class of %vreg0 simply do:
+ // %vreg1 = instr
+ const TargetRegisterClass *NewRC = CP.getNewRC();
+ if (DstIdx != 0) {
+ MachineOperand &DefMO = NewMI.getOperand(0);
+ if (DefMO.getSubReg() == DstIdx) {
+ assert(SrcIdx == 0 && CP.isFlipped()
+ && "Shouldn't have SrcIdx+DstIdx at this point");
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ const TargetRegisterClass *CommonRC =
+ TRI->getCommonSubClass(DefRC, DstRC);
+ if (CommonRC != nullptr) {
+ NewRC = CommonRC;
+ DstIdx = 0;
+ DefMO.setSubReg(0);
+ }
+ }
+ }
+
+ // CopyMI may have implicit operands, save them so that we can transfer them
+ // over to the newly materialized instruction after CopyMI is removed.
+ SmallVector<MachineOperand, 4> ImplicitOps;
+ ImplicitOps.reserve(CopyMI->getNumOperands() -
+ CopyMI->getDesc().getNumOperands());
+ for (unsigned I = CopyMI->getDesc().getNumOperands(),
+ E = CopyMI->getNumOperands();
+ I != E; ++I) {
+ MachineOperand &MO = CopyMI->getOperand(I);
+ if (MO.isReg()) {
+ assert(MO.isImplicit() && "No explicit operands after implict operands.");
+ // Discard VReg implicit defs.
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ ImplicitOps.push_back(MO);
+ }
+ }
+
+ LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
+ CopyMI->eraseFromParent();
+ ErasedInstrs.insert(CopyMI);
+
+ // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
+ // We need to remember these so we can add intervals once we insert
+ // NewMI into SlotIndexes.
+ SmallVector<unsigned, 4> NewMIImplDefs;
+ for (unsigned i = NewMI.getDesc().getNumOperands(),
+ e = NewMI.getNumOperands();
+ i != e; ++i) {
+ MachineOperand &MO = NewMI.getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ assert(MO.isImplicit() && MO.isDead() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
+ NewMIImplDefs.push_back(MO.getReg());
+ }
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ unsigned NewIdx = NewMI.getOperand(0).getSubReg();
+
+ if (DefRC != nullptr) {
+ if (NewIdx)
+ NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
+ else
+ NewRC = TRI->getCommonSubClass(NewRC, DefRC);
+ assert(NewRC && "subreg chosen for remat incompatible with instruction");
+ }
+ // Remap subranges to new lanemask and change register class.
+ LiveInterval &DstInt = LIS->getInterval(DstReg);
+ for (LiveInterval::SubRange &SR : DstInt.subranges()) {
+ SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
+ }
+ MRI->setRegClass(DstReg, NewRC);
+
+ // Update machine operands and add flags.
+ updateRegDefsUses(DstReg, DstReg, DstIdx);
+ NewMI.getOperand(0).setSubReg(NewIdx);
+ // Add dead subregister definitions if we are defining the whole register
+ // but only part of it is live.
+ // This could happen if the rematerialization instruction is rematerializing
+ // more than actually is used in the register.
+ // An example would be:
+ // vreg1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
+ // ; Copying only part of the register here, but the rest is undef.
+ // vreg2:sub_16bit<def, read-undef> = COPY vreg1:sub_16bit
+ // ==>
+ // ; Materialize all the constants but only using one
+ // vreg2 = LOAD_CONSTANTS 5, 8
+ //
+ // at this point for the part that wasn't defined before we could have
+ // subranges missing the definition.
+ if (NewIdx == 0 && DstInt.hasSubRanges()) {
+ SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
+ SlotIndex DefIndex =
+ CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
+ VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
+ for (LiveInterval::SubRange &SR : DstInt.subranges()) {
+ if (!SR.liveAt(DefIndex))
+ SR.createDeadDef(DefIndex, Alloc);
+ MaxMask &= ~SR.LaneMask;
+ }
+ if (MaxMask != 0) {
+ LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
+ SR->createDeadDef(DefIndex, Alloc);
+ }
+ }
+ } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
+ // The New instruction may be defining a sub-register of what's actually
+ // been asked for. If so it must implicitly define the whole thing.
+ assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ "Only expect virtual or physical registers in remat");
+ NewMI.getOperand(0).setIsDead(true);
+ NewMI.addOperand(MachineOperand::CreateReg(
+ CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
+ // Record small dead def live-ranges for all the subregisters
+ // of the destination register.
+ // Otherwise, variables that live through may miss some
+ // interferences, thus creating invalid allocation.
+ // E.g., i386 code:
+ // vreg1 = somedef ; vreg1 GR8
+ // vreg2 = remat ; vreg2 GR32
+ // CL = COPY vreg2.sub_8bit
+ // = somedef vreg1 ; vreg1 GR8
+ // =>
+ // vreg1 = somedef ; vreg1 GR8
+ // ECX<def, dead> = remat ; CL<imp-def>
+ // = somedef vreg1 ; vreg1 GR8
+ // vreg1 will see the inteferences with CL but not with CH since
+ // no live-ranges would have been created for ECX.
+ // Fix that!
+ SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+ for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
+ Units.isValid(); ++Units)
+ if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
+ LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
+ }
+
+ if (NewMI.getOperand(0).getSubReg())
+ NewMI.getOperand(0).setIsUndef();
+
+ // Transfer over implicit operands to the rematerialized instruction.
+ for (MachineOperand &MO : ImplicitOps)
+ NewMI.addOperand(MO);
+
+ SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+ for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
+ unsigned Reg = NewMIImplDefs[i];
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
+ LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
+ }
+
+ DEBUG(dbgs() << "Remat: " << NewMI);
+ ++NumReMats;
+
+ // The source interval can become smaller because we removed a use.
+ shrinkToUses(&SrcInt, &DeadDefs);
+ if (!DeadDefs.empty()) {
+ // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
+ // to describe DstReg instead.
+ for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
+ MachineInstr *UseMI = UseMO.getParent();
+ if (UseMI->isDebugValue()) {
+ UseMO.setReg(DstReg);
+ DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
+ }
+ }
+ eliminateDeadDefs();
+ }
+
+ return true;
+}
+
+bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
+ // ProcessImpicitDefs may leave some copies of <undef> values, it only removes
+ // local variables. When we have a copy like:
+ //
+ // %vreg1 = COPY %vreg2<undef>
+ //
+ // We delete the copy and remove the corresponding value number from %vreg1.
+ // Any uses of that value number are marked as <undef>.
+
+ // Note that we do not query CoalescerPair here but redo isMoveInstr as the
+ // CoalescerPair may have a new register class with adjusted subreg indices
+ // at this point.
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+
+ SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
+ const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
+ // CopyMI is undef iff SrcReg is not live before the instruction.
+ if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
+ LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
+ for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
+ if ((SR.LaneMask & SrcMask) == 0)
+ continue;
+ if (SR.liveAt(Idx))
+ return false;
+ }
+ } else if (SrcLI.liveAt(Idx))
+ return false;
+
+ DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
+
+ // Remove any DstReg segments starting at the instruction.
+ LiveInterval &DstLI = LIS->getInterval(DstReg);
+ SlotIndex RegIndex = Idx.getRegSlot();
+ // Remove value or merge with previous one in case of a subregister def.
+ if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
+ VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
+ DstLI.MergeValueNumberInto(VNI, PrevVNI);
+
+ // The affected subregister segments can be removed.
+ LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
+ for (LiveInterval::SubRange &SR : DstLI.subranges()) {
+ if ((SR.LaneMask & DstMask) == 0)
+ continue;
+
+ VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
+ assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
+ SR.removeValNo(SVNI);
+ }
+ DstLI.removeEmptySubRanges();
+ } else
+ LIS->removeVRegDefAt(DstLI, RegIndex);
+
+ // Mark uses as undef.
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
+ if (MO.isDef() /*|| MO.isUndef()*/)
+ continue;
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI);
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+ bool isLive;
+ if (UseMask != ~0u && DstLI.hasSubRanges()) {
+ isLive = false;
+ for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
+ if ((SR.LaneMask & UseMask) == 0)
+ continue;
+ if (SR.liveAt(UseIdx)) {
+ isLive = true;
+ break;
+ }
+ }
+ } else
+ isLive = DstLI.liveAt(UseIdx);
+ if (isLive)
+ continue;
+ MO.setIsUndef(true);
+ DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
+ }
+ return true;
+}
+
+void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
+ MachineOperand &MO, unsigned SubRegIdx) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ if (MO.isDef())
+ Mask = ~Mask;
+ bool IsUndef = true;
+ for (const LiveInterval::SubRange &S : Int.subranges()) {
+ if ((S.LaneMask & Mask) == 0)
+ continue;
+ if (S.liveAt(UseIdx)) {
+ IsUndef = false;
+ break;
+ }
+ }
+ if (IsUndef) {
+ MO.setIsUndef(true);
+ // We found out some subregister use is actually reading an undefined
+ // value. In some cases the whole vreg has become undefined at this
+ // point so we have to potentially shrink the main range if the
+ // use was ending a live segment there.
+ LiveQueryResult Q = Int.Query(UseIdx);
+ if (Q.valueOut() == nullptr)
+ ShrinkMainRange = true;
+ }
+}
+
+void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
+ unsigned DstReg,
+ unsigned SubIdx) {
+ bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
+
+ if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
+ for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0 || MO.isUndef())
+ continue;
+ MachineInstr &MI = *MO.getParent();
+ if (MI.isDebugValue())
+ continue;
+ SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
+ addUndefFlag(*DstInt, UseIdx, MO, SubReg);
+ }
+ }
+
+ SmallPtrSet<MachineInstr*, 8> Visited;
+ for (MachineRegisterInfo::reg_instr_iterator
+ I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
+ I != E; ) {
+ MachineInstr *UseMI = &*(I++);
+
+ // Each instruction can only be rewritten once because sub-register
+ // composition is not always idempotent. When SrcReg != DstReg, rewriting
+ // the UseMI operands removes them from the SrcReg use-def chain, but when
+ // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+ // operands mentioning the virtual register.
+ if (SrcReg == DstReg && !Visited.insert(UseMI).second)
+ continue;
+
+ SmallVector<unsigned,8> Ops;
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+
+ // If SrcReg wasn't read, it may still be the case that DstReg is live-in
+ // because SrcReg is a sub-register.
+ if (DstInt && !Reads && SubIdx)
+ Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
+
+ // Replace SrcReg with DstReg in all UseMI operands.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(Ops[i]);
+
+ // Adjust <undef> flags in case of sub-register joins. We don't want to
+ // turn a full def into a read-modify-write sub-register def and vice
+ // versa.
+ if (SubIdx && MO.isDef())
+ MO.setIsUndef(!Reads);
+
+ // A subreg use of a partially undef (super) register may be a complete
+ // undef use now and then has to be marked that way.
+ if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ if (!DstInt->hasSubRanges()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+ DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
+ }
+ SlotIndex MIIdx = UseMI->isDebugValue()
+ ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
+ : LIS->getInstructionIndex(*UseMI);
+ SlotIndex UseIdx = MIIdx.getRegSlot(true);
+ addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
+ }
+
+ if (DstIsPhys)
+ MO.substPhysReg(DstReg, *TRI);
+ else
+ MO.substVirtReg(DstReg, SubIdx, *TRI);
+ }
+
+ DEBUG({
+ dbgs() << "\t\tupdated: ";
+ if (!UseMI->isDebugValue())
+ dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
+ dbgs() << *UseMI;
+ });
+ }
+}
+
+bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
+ // Always join simple intervals that are defined by a single copy from a
+ // reserved register. This doesn't increase register pressure, so it is
+ // always beneficial.
+ if (!MRI->isReserved(CP.getDstReg())) {
+ DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
+ return false;
+ }
+
+ LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
+ if (JoinVInt.containsOneValue())
+ return true;
+
+ DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n");
+ return false;
+}
+
+bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
+
+ Again = false;
+ DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
+
+ CoalescerPair CP(*TRI);
+ if (!CP.setRegisters(CopyMI)) {
+ DEBUG(dbgs() << "\tNot coalescable.\n");
+ return false;
+ }
+
+ if (CP.getNewRC()) {
+ auto SrcRC = MRI->getRegClass(CP.getSrcReg());
+ auto DstRC = MRI->getRegClass(CP.getDstReg());
+ unsigned SrcIdx = CP.getSrcIdx();
+ unsigned DstIdx = CP.getDstIdx();
+ if (CP.isFlipped()) {
+ std::swap(SrcIdx, DstIdx);
+ std::swap(SrcRC, DstRC);
+ }
+ if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
+ CP.getNewRC())) {
+ DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
+ return false;
+ }
+ }
+
+ // Dead code elimination. This really should be handled by MachineDCE, but
+ // sometimes dead copies slip through, and we can't generate invalid live
+ // ranges.
+ if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
+ DEBUG(dbgs() << "\tCopy is dead.\n");
+ DeadDefs.push_back(CopyMI);
+ eliminateDeadDefs();
+ return true;
+ }
+
+ // Eliminate undefs.
+ if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) {
+ LIS->RemoveMachineInstrFromMaps(*CopyMI);
+ CopyMI->eraseFromParent();
+ return false; // Not coalescable.
+ }
+
+ // Coalesced copies are normally removed immediately, but transformations
+ // like removeCopyByCommutingDef() can inadvertently create identity copies.
+ // When that happens, just join the values and remove the copy.
+ if (CP.getSrcReg() == CP.getDstReg()) {
+ LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
+ const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
+ LiveQueryResult LRQ = LI.Query(CopyIdx);
+ if (VNInfo *DefVNI = LRQ.valueDefined()) {
+ VNInfo *ReadVNI = LRQ.valueIn();
+ assert(ReadVNI && "No value before copy and no <undef> flag.");
+ assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
+ LI.MergeValueNumberInto(DefVNI, ReadVNI);
+
+ // Process subregister liveranges.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ LiveQueryResult SLRQ = S.Query(CopyIdx);
+ if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
+ VNInfo *SReadVNI = SLRQ.valueIn();
+ S.MergeValueNumberInto(SDefVNI, SReadVNI);
+ }
+ }
+ DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
+ }
+ LIS->RemoveMachineInstrFromMaps(*CopyMI);
+ CopyMI->eraseFromParent();
+ return true;
+ }
+
+ // Enforce policies.
+ if (CP.isPhys()) {
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
+ << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx())
+ << '\n');
+ if (!canJoinPhys(CP)) {
+ // Before giving up coalescing, if definition of source is defined by
+ // trivial computation, try rematerializing it.
+ bool IsDefCopy;
+ if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
+ return true;
+ if (IsDefCopy)
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+ } else {
+ // When possible, let DstReg be the larger interval.
+ if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
+ LIS->getInterval(CP.getDstReg()).size())
+ CP.flip();
+
+ DEBUG({
+ dbgs() << "\tConsidering merging to "
+ << TRI->getRegClassName(CP.getNewRC()) << " with ";
+ if (CP.getDstIdx() && CP.getSrcIdx())
+ dbgs() << PrintReg(CP.getDstReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
+ << PrintReg(CP.getSrcReg()) << " in "
+ << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
+ else
+ dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
+ << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
+ });
+ }
+
+ ShrinkMask = 0;
+ ShrinkMainRange = false;
+
+ // Okay, attempt to join these two intervals. On failure, this returns false.
+ // Otherwise, if one of the intervals being joined is a physreg, this method
+ // always canonicalizes DstInt to be it. The output "SrcInt" will not have
+ // been modified, so we can use this information below to update aliases.
+ if (!joinIntervals(CP)) {
+ // Coalescing failed.
+
+ // If definition of source is defined by trivial computation, try
+ // rematerializing it.
+ bool IsDefCopy;
+ if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
+ return true;
+
+ // If we can eliminate the copy without merging the live segments, do so
+ // now.
+ if (!CP.isPartial() && !CP.isPhys()) {
+ if (adjustCopiesBackFrom(CP, CopyMI) ||
+ removeCopyByCommutingDef(CP, CopyMI)) {
+ LIS->RemoveMachineInstrFromMaps(*CopyMI);
+ CopyMI->eraseFromParent();
+ DEBUG(dbgs() << "\tTrivial!\n");
+ return true;
+ }
+ }
+
+ // Otherwise, we are unable to join the intervals.
+ DEBUG(dbgs() << "\tInterference!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
+ }
+
+ // Coalescing to a virtual register that is of a sub-register class of the
+ // other. Make sure the resulting register is set to the right register class.
+ if (CP.isCrossClass()) {
+ ++numCrossRCs;
+ MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
+ }
+
+ // Removing sub-register copies can ease the register class constraints.
+ // Make sure we attempt to inflate the register class of DstReg.
+ if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
+ InflateRegs.push_back(CP.getDstReg());
+
+ // CopyMI has been erased by joinIntervals at this point. Remove it from
+ // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
+ // to the work list. This keeps ErasedInstrs from growing needlessly.
+ ErasedInstrs.erase(CopyMI);
+
+ // Rewrite all SrcReg operands to DstReg.
+ // Also update DstReg operands to include DstIdx if it is set.
+ if (CP.getDstIdx())
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+
+ // Shrink subregister ranges if necessary.
+ if (ShrinkMask != 0) {
+ LiveInterval &LI = LIS->getInterval(CP.getDstReg());
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if ((S.LaneMask & ShrinkMask) == 0)
+ continue;
+ DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
+ << ")\n");
+ LIS->shrinkToUses(S, LI.reg);
+ }
+ LI.removeEmptySubRanges();
+ }
+ if (ShrinkMainRange) {
+ LiveInterval &LI = LIS->getInterval(CP.getDstReg());
+ shrinkToUses(&LI);
+ }
+
+ // SrcReg is guaranteed to be the register whose live interval that is
+ // being merged.
+ LIS->removeInterval(CP.getSrcReg());
+
+ // Update regalloc hint.
+ TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
+
+ DEBUG({
+ dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
+ << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
+ dbgs() << "\tResult = ";
+ if (CP.isPhys())
+ dbgs() << PrintReg(CP.getDstReg(), TRI);
+ else
+ dbgs() << LIS->getInterval(CP.getDstReg());
+ dbgs() << '\n';
+ });
+
+ ++numJoins;
+ return true;
+}
+
+bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
+ unsigned DstReg = CP.getDstReg();
+ assert(CP.isPhys() && "Must be a physreg copy");
+ assert(MRI->isReserved(DstReg) && "Not a reserved register");
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
+
+ assert(RHS.containsOneValue() && "Invalid join with reserved register");
+
+ // Optimization for reserved registers like ESP. We can only merge with a
+ // reserved physreg if RHS has a single value that is a copy of DstReg.
+ // The live range of the reserved register will look like a set of dead defs
+ // - we don't properly track the live range of reserved registers.
+
+ // Deny any overlapping intervals. This depends on all the reserved
+ // register live ranges to look like dead defs.
+ for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI)
+ if (RHS.overlaps(LIS->getRegUnit(*UI))) {
+ DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+ return false;
+ }
+
+ // Skip any value computations, we are not adding new values to the
+ // reserved register. Also skip merging the live ranges, the reserved
+ // register live range doesn't need to be accurate as long as all the
+ // defs are there.
+
+ // Delete the identity copy.
+ MachineInstr *CopyMI;
+ if (CP.isFlipped()) {
+ CopyMI = MRI->getVRegDef(RHS.reg);
+ } else {
+ if (!MRI->hasOneNonDBGUse(RHS.reg)) {
+ DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
+ return false;
+ }
+
+ MachineInstr *DestMI = MRI->getVRegDef(RHS.reg);
+ CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg);
+ const SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
+ const SlotIndex DestRegIdx = LIS->getInstructionIndex(*DestMI).getRegSlot();
+
+ // We checked above that there are no interfering defs of the physical
+ // register. However, for this case, where we intent to move up the def of
+ // the physical register, we also need to check for interfering uses.
+ SlotIndexes *Indexes = LIS->getSlotIndexes();
+ for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
+ SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
+ MachineInstr *MI = LIS->getInstructionFromIndex(SI);
+ if (MI->readsRegister(DstReg, TRI)) {
+ DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
+ return false;
+ }
+
+ // We must also check for clobbers caused by regmasks.
+ for (const auto &MO : MI->operands()) {
+ if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
+ DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
+ return false;
+ }
+ }
+ }
+
+ // We're going to remove the copy which defines a physical reserved
+ // register, so remove its valno, etc.
+ DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at "
+ << CopyRegIdx << "\n");
+
+ LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
+ // Create a new dead def at the new def location.
+ for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+ LiveRange &LR = LIS->getRegUnit(*UI);
+ LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
+ }
+ }
+
+ LIS->RemoveMachineInstrFromMaps(*CopyMI);
+ CopyMI->eraseFromParent();
+
+ // We don't track kills for reserved registers.
+ MRI->clearKillFlags(CP.getSrcReg());
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Interference checking and interval joining
+//===----------------------------------------------------------------------===//
+//
+// In the easiest case, the two live ranges being joined are disjoint, and
+// there is no interference to consider. It is quite common, though, to have
+// overlapping live ranges, and we need to check if the interference can be
+// resolved.
+//
+// The live range of a single SSA value forms a sub-tree of the dominator tree.
+// This means that two SSA values overlap if and only if the def of one value
+// is contained in the live range of the other value. As a special case, the
+// overlapping values can be defined at the same index.
+//
+// The interference from an overlapping def can be resolved in these cases:
+//
+// 1. Coalescable copies. The value is defined by a copy that would become an
+// identity copy after joining SrcReg and DstReg. The copy instruction will
+// be removed, and the value will be merged with the source value.
+//
+// There can be several copies back and forth, causing many values to be
+// merged into one. We compute a list of ultimate values in the joined live
+// range as well as a mappings from the old value numbers.
+//
+// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
+// predecessors have a live out value. It doesn't cause real interference,
+// and can be merged into the value it overlaps. Like a coalescable copy, it
+// can be erased after joining.
+//
+// 3. Copy of external value. The overlapping def may be a copy of a value that
+// is already in the other register. This is like a coalescable copy, but
+// the live range of the source register must be trimmed after erasing the
+// copy instruction:
+//
+// %src = COPY %ext
+// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext.
+//
+// 4. Clobbering undefined lanes. Vector registers are sometimes built by
+// defining one lane at a time:
+//
+// %dst:ssub0<def,read-undef> = FOO
+// %src = BAR
+// %dst:ssub1<def> = COPY %src
+//
+// The live range of %src overlaps the %dst value defined by FOO, but
+// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
+// which was undef anyway.
+//
+// The value mapping is more complicated in this case. The final live range
+// will have different value numbers for both FOO and BAR, but there is no
+// simple mapping from old to new values. It may even be necessary to add
+// new PHI values.
+//
+// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
+// is live, but never read. This can happen because we don't compute
+// individual live ranges per lane.
+//
+// %dst<def> = FOO
+// %src = BAR
+// %dst:ssub1<def> = COPY %src
+//
+// This kind of interference is only resolved locally. If the clobbered
+// lane value escapes the block, the join is aborted.
+
+namespace {
+/// Track information about values in a single virtual register about to be
+/// joined. Objects of this class are always created in pairs - one for each
+/// side of the CoalescerPair (or one for each lane of a side of the coalescer
+/// pair)
+class JoinVals {
+ /// Live range we work on.
+ LiveRange &LR;
+ /// (Main) register we work on.
+ const unsigned Reg;
+
+ /// Reg (and therefore the values in this liverange) will end up as
+ /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
+ /// CP.SrcIdx.
+ const unsigned SubIdx;
+ /// The LaneMask that this liverange will occupy the coalesced register. May
+ /// be smaller than the lanemask produced by SubIdx when merging subranges.
+ const LaneBitmask LaneMask;
+
+ /// This is true when joining sub register ranges, false when joining main
+ /// ranges.
+ const bool SubRangeJoin;
+ /// Whether the current LiveInterval tracks subregister liveness.
+ const bool TrackSubRegLiveness;
+
+ /// Values that will be present in the final live range.
+ SmallVectorImpl<VNInfo*> &NewVNInfo;
+
+ const CoalescerPair &CP;
+ LiveIntervals *LIS;
+ SlotIndexes *Indexes;
+ const TargetRegisterInfo *TRI;
+
+ /// Value number assignments. Maps value numbers in LI to entries in
+ /// NewVNInfo. This is suitable for passing to LiveInterval::join().
+ SmallVector<int, 8> Assignments;
+
+ /// Conflict resolution for overlapping values.
+ enum ConflictResolution {
+ /// No overlap, simply keep this value.
+ CR_Keep,
+
+ /// Merge this value into OtherVNI and erase the defining instruction.
+ /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
+ /// values.
+ CR_Erase,
+
+ /// Merge this value into OtherVNI but keep the defining instruction.
+ /// This is for the special case where OtherVNI is defined by the same
+ /// instruction.
+ CR_Merge,
+
+ /// Keep this value, and have it replace OtherVNI where possible. This
+ /// complicates value mapping since OtherVNI maps to two different values
+ /// before and after this def.
+ /// Used when clobbering undefined or dead lanes.
+ CR_Replace,
+
+ /// Unresolved conflict. Visit later when all values have been mapped.
+ CR_Unresolved,
+
+ /// Unresolvable conflict. Abort the join.
+ CR_Impossible
+ };
+
+ /// Per-value info for LI. The lane bit masks are all relative to the final
+ /// joined register, so they can be compared directly between SrcReg and
+ /// DstReg.
+ struct Val {
+ ConflictResolution Resolution;
+
+ /// Lanes written by this def, 0 for unanalyzed values.
+ LaneBitmask WriteLanes;
+
+ /// Lanes with defined values in this register. Other lanes are undef and
+ /// safe to clobber.
+ LaneBitmask ValidLanes;
+
+ /// Value in LI being redefined by this def.
+ VNInfo *RedefVNI;
+
+ /// Value in the other live range that overlaps this def, if any.
+ VNInfo *OtherVNI;
+
+ /// Is this value an IMPLICIT_DEF that can be erased?
+ ///
+ /// IMPLICIT_DEF values should only exist at the end of a basic block that
+ /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
+ /// safely erased if they are overlapping a live value in the other live
+ /// interval.
+ ///
+ /// Weird control flow graphs and incomplete PHI handling in
+ /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
+ /// longer live ranges. Such IMPLICIT_DEF values should be treated like
+ /// normal values.
+ bool ErasableImplicitDef;
+
+ /// True when the live range of this value will be pruned because of an
+ /// overlapping CR_Replace value in the other live range.
+ bool Pruned;
+
+ /// True once Pruned above has been computed.
+ bool PrunedComputed;
+
+ Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
+ RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false),
+ Pruned(false), PrunedComputed(false) {}
+
+ bool isAnalyzed() const { return WriteLanes != 0; }
+ };
+
+ /// One entry per value number in LI.
+ SmallVector<Val, 8> Vals;
+
+ /// Compute the bitmask of lanes actually written by DefMI.
+ /// Set Redef if there are any partial register definitions that depend on the
+ /// previous value of the register.
+ LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
+
+ /// Find the ultimate value that VNI was copied from.
+ std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
+
+ bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const;
+
+ /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
+ /// Return a conflict resolution when possible, but leave the hard cases as
+ /// CR_Unresolved.
+ /// Recursively calls computeAssignment() on this and Other, guaranteeing that
+ /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
+ /// The recursion always goes upwards in the dominator tree, making loops
+ /// impossible.
+ ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
+
+ /// Compute the value assignment for ValNo in RI.
+ /// This may be called recursively by analyzeValue(), but never for a ValNo on
+ /// the stack.
+ void computeAssignment(unsigned ValNo, JoinVals &Other);
+
+ /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
+ /// the extent of the tainted lanes in the block.
+ ///
+ /// Multiple values in Other.LR can be affected since partial redefinitions
+ /// can preserve previously tainted lanes.
+ ///
+ /// 1 %dst = VLOAD <-- Define all lanes in %dst
+ /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0
+ /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0
+ /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
+ ///
+ /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
+ /// entry to TaintedVals.
+ ///
+ /// Returns false if the tainted lanes extend beyond the basic block.
+ bool taintExtent(unsigned, LaneBitmask, JoinVals&,
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> >&);
+
+ /// Return true if MI uses any of the given Lanes from Reg.
+ /// This does not include partial redefinitions of Reg.
+ bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
+
+ /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
+ /// be pruned:
+ ///
+ /// %dst = COPY %src
+ /// %src = COPY %dst <-- This value to be pruned.
+ /// %dst = COPY %src <-- This value is a copy of a pruned value.
+ bool isPrunedValue(unsigned ValNo, JoinVals &Other);
+
+public:
+ JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
+ SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
+ LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
+ bool TrackSubRegLiveness)
+ : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
+ SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
+ NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
+ TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums())
+ {}
+
+ /// Analyze defs in LR and compute a value mapping in NewVNInfo.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool mapValues(JoinVals &Other);
+
+ /// Try to resolve conflicts that require all values to be mapped.
+ /// Returns false if any conflicts were impossible to resolve.
+ bool resolveConflicts(JoinVals &Other);
+
+ /// Prune the live range of values in Other.LR where they would conflict with
+ /// CR_Replace values in LR. Collect end points for restoring the live range
+ /// after joining.
+ void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
+ bool changeInstrs);
+
+ /// Removes subranges starting at copies that get removed. This sometimes
+ /// happens when undefined subranges are copied around. These ranges contain
+ /// no useful information and can be removed.
+ void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
+
+ /// Erase any machine instructions that have been coalesced away.
+ /// Add erased instructions to ErasedInstrs.
+ /// Add foreign virtual registers to ShrinkRegs if their live range ended at
+ /// the erased instrs.
+ void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
+ SmallVectorImpl<unsigned> &ShrinkRegs);
+
+ /// Remove liverange defs at places where implicit defs will be removed.
+ void removeImplicitDefs();
+
+ /// Get the value assignments suitable for passing to LiveInterval::join.
+ const int *getAssignments() const { return Assignments.data(); }
+};
+} // end anonymous namespace
+
+LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
+ const {
+ LaneBitmask L = 0;
+ for (const MachineOperand &MO : DefMI->operands()) {
+ if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
+ continue;
+ L |= TRI->getSubRegIndexLaneMask(
+ TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
+ if (MO.readsReg())
+ Redef = true;
+ }
+ return L;
+}
+
+std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
+ const VNInfo *VNI) const {
+ unsigned Reg = this->Reg;
+
+ while (!VNI->isPHIDef()) {
+ SlotIndex Def = VNI->def;
+ MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+ assert(MI && "No defining instruction");
+ if (!MI->isFullCopy())
+ return std::make_pair(VNI, Reg);
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return std::make_pair(VNI, Reg);
+
+ const LiveInterval &LI = LIS->getInterval(SrcReg);
+ const VNInfo *ValueIn;
+ // No subrange involved.
+ if (!SubRangeJoin || !LI.hasSubRanges()) {
+ LiveQueryResult LRQ = LI.Query(Def);
+ ValueIn = LRQ.valueIn();
+ } else {
+ // Query subranges. Pick the first matching one.
+ ValueIn = nullptr;
+ for (const LiveInterval::SubRange &S : LI.subranges()) {
+ // Transform lanemask to a mask in the joined live interval.
+ LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
+ if ((SMask & LaneMask) == 0)
+ continue;
+ LiveQueryResult LRQ = S.Query(Def);
+ ValueIn = LRQ.valueIn();
+ break;
+ }
+ }
+ if (ValueIn == nullptr)
+ break;
+ VNI = ValueIn;
+ Reg = SrcReg;
+ }
+ return std::make_pair(VNI, Reg);
+}
+
+bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
+ const JoinVals &Other) const {
+ const VNInfo *Orig0;
+ unsigned Reg0;
+ std::tie(Orig0, Reg0) = followCopyChain(Value0);
+ if (Orig0 == Value1)
+ return true;
+
+ const VNInfo *Orig1;
+ unsigned Reg1;
+ std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
+
+ // The values are equal if they are defined at the same place and use the
+ // same register. Note that we cannot compare VNInfos directly as some of
+ // them might be from a copy created in mergeSubRangeInto() while the other
+ // is from the original LiveInterval.
+ return Orig0->def == Orig1->def && Reg0 == Reg1;
+}
+
+JoinVals::ConflictResolution
+JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ assert(!V.isAnalyzed() && "Value has already been analyzed!");
+ VNInfo *VNI = LR.getValNumInfo(ValNo);
+ if (VNI->isUnused()) {
+ V.WriteLanes = ~0u;
+ return CR_Keep;
+ }
+
+ // Get the instruction defining this value, compute the lanes written.
+ const MachineInstr *DefMI = nullptr;
+ if (VNI->isPHIDef()) {
+ // Conservatively assume that all lanes in a PHI are valid.
+ LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
+ V.ValidLanes = V.WriteLanes = Lanes;
+ } else {
+ DefMI = Indexes->getInstructionFromIndex(VNI->def);
+ assert(DefMI != nullptr);
+ if (SubRangeJoin) {
+ // We don't care about the lanes when joining subregister ranges.
+ V.WriteLanes = V.ValidLanes = 1;
+ if (DefMI->isImplicitDef()) {
+ V.ValidLanes = 0;
+ V.ErasableImplicitDef = true;
+ }
+ } else {
+ bool Redef = false;
+ V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
+
+ // If this is a read-modify-write instruction, there may be more valid
+ // lanes than the ones written by this instruction.
+ // This only covers partial redef operands. DefMI may have normal use
+ // operands reading the register. They don't contribute valid lanes.
+ //
+ // This adds ssub1 to the set of valid lanes in %src:
+ //
+ // %src:ssub1<def> = FOO
+ //
+ // This leaves only ssub1 valid, making any other lanes undef:
+ //
+ // %src:ssub1<def,read-undef> = FOO %src:ssub2
+ //
+ // The <read-undef> flag on the def operand means that old lane values are
+ // not important.
+ if (Redef) {
+ V.RedefVNI = LR.Query(VNI->def).valueIn();
+ assert((TrackSubRegLiveness || V.RedefVNI) &&
+ "Instruction is reading nonexistent value");
+ if (V.RedefVNI != nullptr) {
+ computeAssignment(V.RedefVNI->id, Other);
+ V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
+ }
+ }
+
+ // An IMPLICIT_DEF writes undef values.
+ if (DefMI->isImplicitDef()) {
+ // We normally expect IMPLICIT_DEF values to be live only until the end
+ // of their block. If the value is really live longer and gets pruned in
+ // another block, this flag is cleared again.
+ V.ErasableImplicitDef = true;
+ V.ValidLanes &= ~V.WriteLanes;
+ }
+ }
+ }
+
+ // Find the value in Other that overlaps VNI->def, if any.
+ LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
+
+ // It is possible that both values are defined by the same instruction, or
+ // the values are PHIs defined in the same block. When that happens, the two
+ // values should be merged into one, but not into any preceding value.
+ // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
+ if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
+ assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
+
+ // One value stays, the other is merged. Keep the earlier one, or the first
+ // one we see.
+ if (OtherVNI->def < VNI->def)
+ Other.computeAssignment(OtherVNI->id, *this);
+ else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
+ // This is an early-clobber def overlapping a live-in value in the other
+ // register. Not mergeable.
+ V.OtherVNI = OtherLRQ.valueIn();
+ return CR_Impossible;
+ }
+ V.OtherVNI = OtherVNI;
+ Val &OtherV = Other.Vals[OtherVNI->id];
+ // Keep this value, check for conflicts when analyzing OtherVNI.
+ if (!OtherV.isAnalyzed())
+ return CR_Keep;
+ // Both sides have been analyzed now.
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Merge;
+ if (V.ValidLanes & OtherV.ValidLanes)
+ // Overlapping lanes can't be resolved.
+ return CR_Impossible;
+ else
+ return CR_Merge;
+ }
+
+ // No simultaneous def. Is Other live at the def?
+ V.OtherVNI = OtherLRQ.valueIn();
+ if (!V.OtherVNI)
+ // No overlap, no conflict.
+ return CR_Keep;
+
+ assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
+
+ // We have overlapping values, or possibly a kill of Other.
+ // Recursively compute assignments up the dominator tree.
+ Other.computeAssignment(V.OtherVNI->id, *this);
+ Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
+ // This shouldn't normally happen, but ProcessImplicitDefs can leave such
+ // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
+ // technically.
+ //
+ // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
+ // to erase the IMPLICIT_DEF instruction.
+ if (OtherV.ErasableImplicitDef && DefMI &&
+ DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+ DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " extends into BB#" << DefMI->getParent()->getNumber()
+ << ", keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
+ }
+
+ // Allow overlapping PHI values. Any real interference would show up in a
+ // predecessor, the PHI itself can't introduce any conflicts.
+ if (VNI->isPHIDef())
+ return CR_Replace;
+
+ // Check for simple erasable conflicts.
+ if (DefMI->isImplicitDef()) {
+ // We need the def for the subregister if there is nothing else live at the
+ // subrange at this point.
+ if (TrackSubRegLiveness
+ && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)) == 0)
+ return CR_Replace;
+ return CR_Erase;
+ }
+
+ // Include the non-conflict where DefMI is a coalescable copy that kills
+ // OtherVNI. We still want the copy erased and value numbers merged.
+ if (CP.isCoalescable(DefMI)) {
+ // Some of the lanes copied from OtherVNI may be undef, making them undef
+ // here too.
+ V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
+ return CR_Erase;
+ }
+
+ // This may not be a real conflict if DefMI simply kills Other and defines
+ // VNI.
+ if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
+ return CR_Keep;
+
+ // Handle the case where VNI and OtherVNI can be proven to be identical:
+ //
+ // %other = COPY %ext
+ // %this = COPY %ext <-- Erase this copy
+ //
+ if (DefMI->isFullCopy() && !CP.isPartial()
+ && valuesIdentical(VNI, V.OtherVNI, Other))
+ return CR_Erase;
+
+ // If the lanes written by this instruction were all undef in OtherVNI, it is
+ // still safe to join the live ranges. This can't be done with a simple value
+ // mapping, though - OtherVNI will map to multiple values:
+ //
+ // 1 %dst:ssub0 = FOO <-- OtherVNI
+ // 2 %src = BAR <-- VNI
+ // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy.
+ // 4 BAZ %dst<kill>
+ // 5 QUUX %src<kill>
+ //
+ // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
+ // handles this complex value mapping.
+ if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+ return CR_Replace;
+
+ // If the other live range is killed by DefMI and the live ranges are still
+ // overlapping, it must be because we're looking at an early clobber def:
+ //
+ // %dst<def,early-clobber> = ASM %src<kill>
+ //
+ // In this case, it is illegal to merge the two live ranges since the early
+ // clobber def would clobber %src before it was read.
+ if (OtherLRQ.isKill()) {
+ // This case where the def doesn't overlap the kill is handled above.
+ assert(VNI->def.isEarlyClobber() &&
+ "Only early clobber defs can overlap a kill");
+ return CR_Impossible;
+ }
+
+ // VNI is clobbering live lanes in OtherVNI, but there is still the
+ // possibility that no instructions actually read the clobbered lanes.
+ // If we're clobbering all the lanes in OtherVNI, at least one must be read.
+ // Otherwise Other.RI wouldn't be live here.
+ if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+ return CR_Impossible;
+
+ // We need to verify that no instructions are reading the clobbered lanes. To
+ // save compile time, we'll only check that locally. Don't allow the tainted
+ // value to escape the basic block.
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
+ return CR_Impossible;
+
+ // There are still some things that could go wrong besides clobbered lanes
+ // being read, for example OtherVNI may be only partially redefined in MBB,
+ // and some clobbered lanes could escape the block. Save this analysis for
+ // resolveConflicts() when all values have been mapped. We need to know
+ // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
+ // that now - the recursive analyzeValue() calls must go upwards in the
+ // dominator tree.
+ return CR_Unresolved;
+}
+
+void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.isAnalyzed()) {
+ // Recursion should always move up the dominator tree, so ValNo is not
+ // supposed to reappear before it has been assigned.
+ assert(Assignments[ValNo] != -1 && "Bad recursion?");
+ return;
+ }
+ switch ((V.Resolution = analyzeValue(ValNo, Other))) {
+ case CR_Erase:
+ case CR_Merge:
+ // Merge this ValNo into OtherVNI.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
+ assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
+ Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
+ DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@'
+ << LR.getValNumInfo(ValNo)->def << " into "
+ << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
+ << V.OtherVNI->def << " --> @"
+ << NewVNInfo[Assignments[ValNo]]->def << '\n');
+ break;
+ case CR_Replace:
+ case CR_Unresolved: {
+ // The other value is going to be pruned if this join is successful.
+ assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
+ Val &OtherV = Other.Vals[V.OtherVNI->id];
+ // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
+ // its lanes.
+ if ((OtherV.WriteLanes & ~V.ValidLanes) != 0 && TrackSubRegLiveness)
+ OtherV.ErasableImplicitDef = false;
+ OtherV.Pruned = true;
+ }
+ // Fall through.
+ default:
+ // This value number needs to go in the final joined live range.
+ Assignments[ValNo] = NewVNInfo.size();
+ NewVNInfo.push_back(LR.getValNumInfo(ValNo));
+ break;
+ }
+}
+
+bool JoinVals::mapValues(JoinVals &Other) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ computeAssignment(i, Other);
+ if (Vals[i].Resolution == CR_Impossible) {
+ DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i
+ << '@' << LR.getValNumInfo(i)->def << '\n');
+ return false;
+ }
+ }
+ return true;
+}
+
+bool JoinVals::
+taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
+ SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> > &TaintExtent) {
+ VNInfo *VNI = LR.getValNumInfo(ValNo);
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
+
+ // Scan Other.LR from VNI.def to MBBEnd.
+ LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
+ assert(OtherI != Other.LR.end() && "No conflict?");
+ do {
+ // OtherI is pointing to a tainted value. Abort the join if the tainted
+ // lanes escape the block.
+ SlotIndex End = OtherI->end;
+ if (End >= MBBEnd) {
+ DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start << '\n');
+ return false;
+ }
+ DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':'
+ << OtherI->valno->id << '@' << OtherI->start
+ << " to " << End << '\n');
+ // A dead def is not a problem.
+ if (End.isDead())
+ break;
+ TaintExtent.push_back(std::make_pair(End, TaintedLanes));
+
+ // Check for another def in the MBB.
+ if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
+ break;
+
+ // Lanes written by the new def are no longer tainted.
+ const Val &OV = Other.Vals[OtherI->valno->id];
+ TaintedLanes &= ~OV.WriteLanes;
+ if (!OV.RedefVNI)
+ break;
+ } while (TaintedLanes);
+ return true;
+}
+
+bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
+ LaneBitmask Lanes) const {
+ if (MI.isDebugValue())
+ return false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
+ continue;
+ if (!MO.readsReg())
+ continue;
+ if (Lanes & TRI->getSubRegIndexLaneMask(
+ TRI->composeSubRegIndices(SubIdx, MO.getSubReg())))
+ return true;
+ }
+ return false;
+}
+
+bool JoinVals::resolveConflicts(JoinVals &Other) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
+ if (V.Resolution != CR_Unresolved)
+ continue;
+ DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i
+ << '@' << LR.getValNumInfo(i)->def << '\n');
+ if (SubRangeJoin)
+ return false;
+
+ ++NumLaneConflicts;
+ assert(V.OtherVNI && "Inconsistent conflict resolution.");
+ VNInfo *VNI = LR.getValNumInfo(i);
+ const Val &OtherV = Other.Vals[V.OtherVNI->id];
+
+ // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
+ // join, those lanes will be tainted with a wrong value. Get the extent of
+ // the tainted lanes.
+ LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+ SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
+ if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
+ // Tainted lanes would extend beyond the basic block.
+ return false;
+
+ assert(!TaintExtent.empty() && "There should be at least one conflict.");
+
+ // Now look at the instructions from VNI->def to TaintExtent (inclusive).
+ MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
+ MachineBasicBlock::iterator MI = MBB->begin();
+ if (!VNI->isPHIDef()) {
+ MI = Indexes->getInstructionFromIndex(VNI->def);
+ // No need to check the instruction defining VNI for reads.
+ ++MI;
+ }
+ assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
+ "Interference ends on VNI->def. Should have been handled earlier");
+ MachineInstr *LastMI =
+ Indexes->getInstructionFromIndex(TaintExtent.front().first);
+ assert(LastMI && "Range must end at a proper instruction");
+ unsigned TaintNum = 0;
+ for(;;) {
+ assert(MI != MBB->end() && "Bad LastMI");
+ if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
+ DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
+ return false;
+ }
+ // LastMI is the last instruction to use the current value.
+ if (&*MI == LastMI) {
+ if (++TaintNum == TaintExtent.size())
+ break;
+ LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
+ assert(LastMI && "Range must end at a proper instruction");
+ TaintedLanes = TaintExtent[TaintNum].second;
+ }
+ ++MI;
+ }
+
+ // The tainted lanes are unused.
+ V.Resolution = CR_Replace;
+ ++NumLaneResolves;
+ }
+ return true;
+}
+
+bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
+ Val &V = Vals[ValNo];
+ if (V.Pruned || V.PrunedComputed)
+ return V.Pruned;
+
+ if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
+ return V.Pruned;
+
+ // Follow copies up the dominator tree and check if any intermediate value
+ // has been pruned.
+ V.PrunedComputed = true;
+ V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
+ return V.Pruned;
+}
+
+void JoinVals::pruneValues(JoinVals &Other,
+ SmallVectorImpl<SlotIndex> &EndPoints,
+ bool changeInstrs) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ SlotIndex Def = LR.getValNumInfo(i)->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep:
+ break;
+ case CR_Replace: {
+ // This value takes precedence over the value in Other.LR.
+ LIS->pruneValue(Other.LR, Def, &EndPoints);
+ // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+ // instructions are only inserted to provide a live-out value for PHI
+ // predecessors, so the instruction should simply go away once its value
+ // has been replaced.
+ Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+ bool EraseImpDef = OtherV.ErasableImplicitDef &&
+ OtherV.Resolution == CR_Keep;
+ if (!Def.isBlock()) {
+ if (changeInstrs) {
+ // Remove <def,read-undef> flags. This def is now a partial redef.
+ // Also remove <def,dead> flags since the joined live range will
+ // continue past this instruction.
+ for (MachineOperand &MO :
+ Indexes->getInstructionFromIndex(Def)->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
+ MO.setIsUndef(EraseImpDef);
+ MO.setIsDead(false);
+ }
+ }
+ }
+ // This value will reach instructions below, but we need to make sure
+ // the live range also reaches the instruction at Def.
+ if (!EraseImpDef)
+ EndPoints.push_back(Def);
+ }
+ DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def
+ << ": " << Other.LR << '\n');
+ break;
+ }
+ case CR_Erase:
+ case CR_Merge:
+ if (isPrunedValue(i, Other)) {
+ // This value is ultimately a copy of a pruned value in LR or Other.LR.
+ // We can no longer trust the value mapping computed by
+ // computeAssignment(), the value that was originally copied could have
+ // been replaced.
+ LIS->pruneValue(LR, Def, &EndPoints);
+ DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at "
+ << Def << ": " << LR << '\n');
+ }
+ break;
+ case CR_Unresolved:
+ case CR_Impossible:
+ llvm_unreachable("Unresolved conflicts");
+ }
+ }
+}
+
+void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
+{
+ // Look for values being erased.
+ bool DidPrune = false;
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ if (Vals[i].Resolution != CR_Erase)
+ continue;
+
+ // Check subranges at the point where the copy will be removed.
+ SlotIndex Def = LR.getValNumInfo(i)->def;
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ LiveQueryResult Q = S.Query(Def);
+
+ // If a subrange starts at the copy then an undefined value has been
+ // copied and we must remove that subrange value as well.
+ VNInfo *ValueOut = Q.valueOutOrDead();
+ if (ValueOut != nullptr && Q.valueIn() == nullptr) {
+ DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
+ << " at " << Def << "\n");
+ LIS->pruneValue(S, Def, nullptr);
+ DidPrune = true;
+ // Mark value number as unused.
+ ValueOut->markUnused();
+ continue;
+ }
+ // If a subrange ends at the copy, then a value was copied but only
+ // partially used later. Shrink the subregister range appropriately.
+ if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
+ DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask)
+ << " at " << Def << "\n");
+ ShrinkMask |= S.LaneMask;
+ }
+ }
+ }
+ if (DidPrune)
+ LI.removeEmptySubRanges();
+}
+
+void JoinVals::removeImplicitDefs() {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
+ continue;
+
+ VNInfo *VNI = LR.getValNumInfo(i);
+ VNI->markUnused();
+ LR.removeValNo(VNI);
+ }
+}
+
+void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
+ SmallVectorImpl<unsigned> &ShrinkRegs) {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ // Get the def location before markUnused() below invalidates it.
+ SlotIndex Def = LR.getValNumInfo(i)->def;
+ switch (Vals[i].Resolution) {
+ case CR_Keep: {
+ // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+ // longer. The IMPLICIT_DEF instructions are only inserted by
+ // PHIElimination to guarantee that all PHI predecessors have a value.
+ if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
+ break;
+ // Remove value number i from LR.
+ VNInfo *VNI = LR.getValNumInfo(i);
+ LR.removeValNo(VNI);
+ // Note that this VNInfo is reused and still referenced in NewVNInfo,
+ // make it appear like an unused value number.
+ VNI->markUnused();
+ DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n');
+ // FALL THROUGH.
+ }
+
+ case CR_Erase: {
+ MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+ assert(MI && "No instruction to erase");
+ if (MI->isCopy()) {
+ unsigned Reg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+ ShrinkRegs.push_back(Reg);
+ }
+ ErasedInstrs.insert(MI);
+ DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ LaneBitmask LaneMask,
+ const CoalescerPair &CP) {
+ SmallVector<VNInfo*, 16> NewVNInfo;
+ JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
+ NewVNInfo, CP, LIS, TRI, true, true);
+ JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
+ NewVNInfo, CP, LIS, TRI, true, true);
+
+ // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
+ // We should be able to resolve all conflicts here as we could successfully do
+ // it on the mainrange already. There is however a problem when multiple
+ // ranges get mapped to the "overflow" lane mask bit which creates unexpected
+ // interferences.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
+ // We already determined that it is legal to merge the intervals, so this
+ // should never fail.
+ llvm_unreachable("*** Couldn't join subrange!\n");
+ }
+ if (!LHSVals.resolveConflicts(RHSVals) ||
+ !RHSVals.resolveConflicts(LHSVals)) {
+ // We already determined that it is legal to merge the intervals, so this
+ // should never fail.
+ llvm_unreachable("*** Couldn't join subrange!\n");
+ }
+
+ // The merging algorithm in LiveInterval::join() can't handle conflicting
+ // value mappings, so we need to remove any live ranges that overlap a
+ // CR_Replace resolution. Collect a set of end points that can be used to
+ // restore the live range after joining.
+ SmallVector<SlotIndex, 8> EndPoints;
+ LHSVals.pruneValues(RHSVals, EndPoints, false);
+ RHSVals.pruneValues(LHSVals, EndPoints, false);
+
+ LHSVals.removeImplicitDefs();
+ RHSVals.removeImplicitDefs();
+
+ LRange.verify();
+ RRange.verify();
+
+ // Join RRange into LHS.
+ LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
+ NewVNInfo);
+
+ DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
+ if (EndPoints.empty())
+ return;
+
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+ << " points: " << LRange << '\n');
+ LIS->extendToIndices(LRange, EndPoints);
+}
+
+void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+ const LiveRange &ToMerge,
+ LaneBitmask LaneMask,
+ CoalescerPair &CP) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ for (LiveInterval::SubRange &R : LI.subranges()) {
+ LaneBitmask RMask = R.LaneMask;
+ // LaneMask of subregisters common to subrange R and ToMerge.
+ LaneBitmask Common = RMask & LaneMask;
+ // There is nothing to do without common subregs.
+ if (Common == 0)
+ continue;
+
+ DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
+ << PrintLaneMask(Common) << '\n');
+ // LaneMask of subregisters contained in the R range but not in ToMerge,
+ // they have to split into their own subrange.
+ LaneBitmask LRest = RMask & ~LaneMask;
+ LiveInterval::SubRange *CommonRange;
+ if (LRest != 0) {
+ R.LaneMask = LRest;
+ DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
+ // Duplicate SubRange for newly merged common stuff.
+ CommonRange = LI.createSubRangeFrom(Allocator, Common, R);
+ } else {
+ // Reuse the existing range.
+ R.LaneMask = Common;
+ CommonRange = &R;
+ }
+ LiveRange RangeCopy(ToMerge, Allocator);
+ joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
+ LaneMask &= ~RMask;
+ }
+
+ if (LaneMask != 0) {
+ DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
+ LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
+ }
+}
+
+bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
+ SmallVector<VNInfo*, 16> NewVNInfo;
+ LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
+ LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
+ bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
+ JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS,
+ TRI, false, TrackSubRegLiveness);
+ JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS,
+ TRI, false, TrackSubRegLiveness);
+
+ DEBUG(dbgs() << "\t\tRHS = " << RHS
+ << "\n\t\tLHS = " << LHS
+ << '\n');
+
+ // First compute NewVNInfo and the simple value mappings.
+ // Detect impossible conflicts early.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
+ return false;
+
+ // Some conflicts can only be resolved after all values have been mapped.
+ if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
+ return false;
+
+ // All clear, the live ranges can be merged.
+ if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+
+ // Transform lanemasks from the LHS to masks in the coalesced register and
+ // create initial subranges if necessary.
+ unsigned DstIdx = CP.getDstIdx();
+ if (!LHS.hasSubRanges()) {
+ LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(DstIdx);
+ // LHS must support subregs or we wouldn't be in this codepath.
+ assert(Mask != 0);
+ LHS.createSubRangeFrom(Allocator, Mask, LHS);
+ } else if (DstIdx != 0) {
+ // Transform LHS lanemasks to new register class if necessary.
+ for (LiveInterval::SubRange &R : LHS.subranges()) {
+ LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
+ R.LaneMask = Mask;
+ }
+ }
+ DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg())
+ << ' ' << LHS << '\n');
+
+ // Determine lanemasks of RHS in the coalesced register and merge subranges.
+ unsigned SrcIdx = CP.getSrcIdx();
+ if (!RHS.hasSubRanges()) {
+ LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
+ : TRI->getSubRegIndexLaneMask(SrcIdx);
+ mergeSubRangeInto(LHS, RHS, Mask, CP);
+ } else {
+ // Pair up subranges and merge.
+ for (LiveInterval::SubRange &R : RHS.subranges()) {
+ LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
+ mergeSubRangeInto(LHS, R, Mask, CP);
+ }
+ }
+ DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
+
+ LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ }
+
+ // The merging algorithm in LiveInterval::join() can't handle conflicting
+ // value mappings, so we need to remove any live ranges that overlap a
+ // CR_Replace resolution. Collect a set of end points that can be used to
+ // restore the live range after joining.
+ SmallVector<SlotIndex, 8> EndPoints;
+ LHSVals.pruneValues(RHSVals, EndPoints, true);
+ RHSVals.pruneValues(LHSVals, EndPoints, true);
+
+ // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
+ // registers to require trimming.
+ SmallVector<unsigned, 8> ShrinkRegs;
+ LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+ while (!ShrinkRegs.empty())
+ shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+
+ // Join RHS into LHS.
+ LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
+
+ // Kill flags are going to be wrong if the live ranges were overlapping.
+ // Eventually, we should simply clear all kill flags when computing live
+ // ranges. They are reinserted after register allocation.
+ MRI->clearKillFlags(LHS.reg);
+ MRI->clearKillFlags(RHS.reg);
+
+ if (!EndPoints.empty()) {
+ // Recompute the parts of the live range we had to remove because of
+ // CR_Replace conflicts.
+ DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
+ << " points: " << LHS << '\n');
+ LIS->extendToIndices((LiveRange&)LHS, EndPoints);
+ }
+
+ return true;
+}
+
+bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
+ return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
+}
+
+namespace {
+/// Information concerning MBB coalescing priority.
+struct MBBPriorityInfo {
+ MachineBasicBlock *MBB;
+ unsigned Depth;
+ bool IsSplit;
+
+ MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
+ : MBB(mbb), Depth(depth), IsSplit(issplit) {}
+};
+}
+
+/// C-style comparator that sorts first based on the loop depth of the basic
+/// block (the unsigned), and then on the MBB number.
+///
+/// EnableGlobalCopies assumes that the primary sort key is loop depth.
+static int compareMBBPriority(const MBBPriorityInfo *LHS,
+ const MBBPriorityInfo *RHS) {
+ // Deeper loops first
+ if (LHS->Depth != RHS->Depth)
+ return LHS->Depth > RHS->Depth ? -1 : 1;
+
+ // Try to unsplit critical edges next.
+ if (LHS->IsSplit != RHS->IsSplit)
+ return LHS->IsSplit ? -1 : 1;
+
+ // Prefer blocks that are more connected in the CFG. This takes care of
+ // the most difficult copies first while intervals are short.
+ unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
+ unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
+ if (cl != cr)
+ return cl > cr ? -1 : 1;
+
+ // As a last resort, sort by block number.
+ return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
+}
+
+/// \returns true if the given copy uses or defines a local live range.
+static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
+ if (!Copy->isCopy())
+ return false;
+
+ if (Copy->getOperand(1).isUndef())
+ return false;
+
+ unsigned SrcReg = Copy->getOperand(1).getReg();
+ unsigned DstReg = Copy->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
+ || TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return false;
+
+ return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
+ || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
+}
+
+bool RegisterCoalescer::
+copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
+ bool Progress = false;
+ for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
+ if (!CurrList[i])
+ continue;
+ // Skip instruction pointers that have already been erased, for example by
+ // dead code elimination.
+ if (ErasedInstrs.erase(CurrList[i])) {
+ CurrList[i] = nullptr;
+ continue;
+ }
+ bool Again = false;
+ bool Success = joinCopy(CurrList[i], Again);
+ Progress |= Success;
+ if (Success || !Again)
+ CurrList[i] = nullptr;
+ }
+ return Progress;
+}
+
+/// Check if DstReg is a terminal node.
+/// I.e., it does not have any affinity other than \p Copy.
+static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
+ const MachineRegisterInfo *MRI) {
+ assert(Copy.isCopyLike());
+ // Check if the destination of this copy as any other affinity.
+ for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
+ if (&MI != &Copy && MI.isCopyLike())
+ return false;
+ return true;
+}
+
+bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
+ assert(Copy.isCopyLike());
+ if (!UseTerminalRule)
+ return false;
+ unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
+ isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
+ // Check if the destination of this copy has any other affinity.
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ // If SrcReg is a physical register, the copy won't be coalesced.
+ // Ignoring it may have other side effect (like missing
+ // rematerialization). So keep it.
+ TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
+ !isTerminalReg(DstReg, Copy, MRI))
+ return false;
+
+ // DstReg is a terminal node. Check if it interferes with any other
+ // copy involving SrcReg.
+ const MachineBasicBlock *OrigBB = Copy.getParent();
+ const LiveInterval &DstLI = LIS->getInterval(DstReg);
+ for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
+ // Technically we should check if the weight of the new copy is
+ // interesting compared to the other one and update the weight
+ // of the copies accordingly. However, this would only work if
+ // we would gather all the copies first then coalesce, whereas
+ // right now we interleave both actions.
+ // For now, just consider the copies that are in the same block.
+ if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
+ continue;
+ unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
+ isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+ OtherSubReg);
+ if (OtherReg == SrcReg)
+ OtherReg = OtherSrcReg;
+ // Check if OtherReg is a non-terminal.
+ if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
+ isTerminalReg(OtherReg, MI, MRI))
+ continue;
+ // Check that OtherReg interfere with DstReg.
+ if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
+ DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n');
+ return true;
+ }
+ }
+ return false;
+}
+
+void
+RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << MBB->getName() << ":\n");
+
+ // Collect all copy-like instructions in MBB. Don't start coalescing anything
+ // yet, it might invalidate the iterator.
+ const unsigned PrevSize = WorkList.size();
+ if (JoinGlobalCopies) {
+ SmallVector<MachineInstr*, 2> LocalTerminals;
+ SmallVector<MachineInstr*, 2> GlobalTerminals;
+ // Coalesce copies bottom-up to coalesce local defs before local uses. They
+ // are not inherently easier to resolve, but slightly preferable until we
+ // have local live range splitting. In particular this is required by
+ // cmp+jmp macro fusion.
+ for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+ MII != E; ++MII) {
+ if (!MII->isCopyLike())
+ continue;
+ bool ApplyTerminalRule = applyTerminalRule(*MII);
+ if (isLocalCopy(&(*MII), LIS)) {
+ if (ApplyTerminalRule)
+ LocalTerminals.push_back(&(*MII));
+ else
+ LocalWorkList.push_back(&(*MII));
+ } else {
+ if (ApplyTerminalRule)
+ GlobalTerminals.push_back(&(*MII));
+ else
+ WorkList.push_back(&(*MII));
+ }
+ }
+ // Append the copies evicted by the terminal rule at the end of the list.
+ LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
+ WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
+ }
+ else {
+ SmallVector<MachineInstr*, 2> Terminals;
+ for (MachineInstr &MII : *MBB)
+ if (MII.isCopyLike()) {
+ if (applyTerminalRule(MII))
+ Terminals.push_back(&MII);
+ else
+ WorkList.push_back(&MII);
+ }
+ // Append the copies evicted by the terminal rule at the end of the list.
+ WorkList.append(Terminals.begin(), Terminals.end());
+ }
+ // Try coalescing the collected copies immediately, and remove the nulls.
+ // This prevents the WorkList from getting too large since most copies are
+ // joinable on the first attempt.
+ MutableArrayRef<MachineInstr*>
+ CurrList(WorkList.begin() + PrevSize, WorkList.end());
+ if (copyCoalesceWorkList(CurrList))
+ WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
+ (MachineInstr*)nullptr), WorkList.end());
+}
+
+void RegisterCoalescer::coalesceLocals() {
+ copyCoalesceWorkList(LocalWorkList);
+ for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
+ if (LocalWorkList[j])
+ WorkList.push_back(LocalWorkList[j]);
+ }
+ LocalWorkList.clear();
+}
+
+void RegisterCoalescer::joinAllIntervals() {
+ DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+ assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
+
+ std::vector<MBBPriorityInfo> MBBs;
+ MBBs.reserve(MF->size());
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
+ JoinSplitEdges && isSplitEdge(MBB)));
+ }
+ array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
+
+ // Coalesce intervals in MBB priority order.
+ unsigned CurrDepth = UINT_MAX;
+ for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+ // Try coalescing the collected local copies for deeper loops.
+ if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
+ coalesceLocals();
+ CurrDepth = MBBs[i].Depth;
+ }
+ copyCoalesceInMBB(MBBs[i].MBB);
+ }
+ coalesceLocals();
+
+ // Joining intervals can allow other intervals to be joined. Iteratively join
+ // until we make no progress.
+ while (copyCoalesceWorkList(WorkList))
+ /* empty */ ;
+}
+
+void RegisterCoalescer::releaseMemory() {
+ ErasedInstrs.clear();
+ WorkList.clear();
+ DeadDefs.clear();
+ InflateRegs.clear();
+}
+
+bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ MRI = &fn.getRegInfo();
+ TM = &fn.getTarget();
+ const TargetSubtargetInfo &STI = fn.getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TII = STI.getInstrInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ Loops = &getAnalysis<MachineLoopInfo>();
+ if (EnableGlobalCopies == cl::BOU_UNSET)
+ JoinGlobalCopies = STI.enableJoinGlobalCopies();
+ else
+ JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
+
+ // The MachineScheduler does not currently require JoinSplitEdges. This will
+ // either be enabled unconditionally or replaced by a more general live range
+ // splitting optimization.
+ JoinSplitEdges = EnableJoinSplits;
+
+ DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ << "********** Function: " << MF->getName() << '\n');
+
+ if (VerifyCoalescing)
+ MF->verify(this, "Before register coalescing");
+
+ RegClassInfo.runOnMachineFunction(fn);
+
+ // Join (coalesce) intervals if requested.
+ if (EnableJoining)
+ joinAllIntervals();
+
+ // After deleting a lot of copies, register classes may be less constrained.
+ // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
+ // DPR inflation.
+ array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+ InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+ InflateRegs.end());
+ DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+ for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+ unsigned Reg = InflateRegs[i];
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ if (MRI->recomputeRegClass(Reg)) {
+ DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+ << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
+ ++NumInflated;
+
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ // If the inflated register class does not support subregisters anymore
+ // remove the subranges.
+ if (!MRI->shouldTrackSubRegLiveness(Reg)) {
+ LI.clearSubRanges();
+ } else {
+#ifndef NDEBUG
+ LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ // If subranges are still supported, then the same subregs
+ // should still be supported.
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ assert((S.LaneMask & ~MaxMask) == 0);
+ }
+#endif
+ }
+ }
+ }
+ }
+
+ DEBUG(dump());
+ if (VerifyCoalescing)
+ MF->verify(this, "After register coalescing");
+ return true;
+}
+
+void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
+ LIS->print(O, m);
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
new file mode 100644
index 000000000000..04067a1427af
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -0,0 +1,116 @@
+//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the abstract interface for register coalescers,
+// allowing them to interact with and query register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
+#define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
+
+namespace llvm {
+
+ class MachineInstr;
+ class TargetRegisterInfo;
+ class TargetRegisterClass;
+ class TargetInstrInfo;
+
+ /// A helper class for register coalescers. When deciding if
+ /// two registers can be coalesced, CoalescerPair can determine if a copy
+ /// instruction would become an identity copy after coalescing.
+ class CoalescerPair {
+ const TargetRegisterInfo &TRI;
+
+ /// The register that will be left after coalescing. It can be a
+ /// virtual or physical register.
+ unsigned DstReg;
+
+ /// The virtual register that will be coalesced into dstReg.
+ unsigned SrcReg;
+
+ /// The sub-register index of the old DstReg in the new coalesced register.
+ unsigned DstIdx;
+
+ /// The sub-register index of the old SrcReg in the new coalesced register.
+ unsigned SrcIdx;
+
+ /// True when the original copy was a partial subregister copy.
+ bool Partial;
+
+ /// True when both regs are virtual and newRC is constrained.
+ bool CrossClass;
+
+ /// True when DstReg and SrcReg are reversed from the original
+ /// copy instruction.
+ bool Flipped;
+
+ /// The register class of the coalesced register, or NULL if DstReg
+ /// is a physreg. This register class may be a super-register of both
+ /// SrcReg and DstReg.
+ const TargetRegisterClass *NewRC;
+
+ public:
+ CoalescerPair(const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
+ Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
+
+ /// Create a CoalescerPair representing a virtreg-to-physreg copy.
+ /// No need to call setRegisters().
+ CoalescerPair(unsigned VirtReg, unsigned PhysReg,
+ const TargetRegisterInfo &tri)
+ : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
+ Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
+
+ /// Set registers to match the copy instruction MI. Return
+ /// false if MI is not a coalescable copy instruction.
+ bool setRegisters(const MachineInstr*);
+
+ /// Swap SrcReg and DstReg. Return false if swapping is impossible
+ /// because DstReg is a physical register, or SubIdx is set.
+ bool flip();
+
+ /// Return true if MI is a copy instruction that will become
+ /// an identity copy after coalescing.
+ bool isCoalescable(const MachineInstr*) const;
+
+ /// Return true if DstReg is a physical register.
+ bool isPhys() const { return !NewRC; }
+
+ /// Return true if the original copy instruction did not copy
+ /// the full register, but was a subreg operation.
+ bool isPartial() const { return Partial; }
+
+ /// Return true if DstReg is virtual and NewRC is a smaller
+ /// register class than DstReg's.
+ bool isCrossClass() const { return CrossClass; }
+
+ /// Return true when getSrcReg is the register being defined by
+ /// the original copy instruction.
+ bool isFlipped() const { return Flipped; }
+
+ /// Return the register (virtual or physical) that will remain
+ /// after coalescing.
+ unsigned getDstReg() const { return DstReg; }
+
+ /// Return the virtual register that will be coalesced away.
+ unsigned getSrcReg() const { return SrcReg; }
+
+ /// Return the subregister index that DstReg will be coalesced into, or 0.
+ unsigned getDstIdx() const { return DstIdx; }
+
+ /// Return the subregister index that SrcReg will be coalesced into, or 0.
+ unsigned getSrcIdx() const { return SrcIdx; }
+
+ /// Return the register class of the coalesced register.
+ const TargetRegisterClass *getNewRC() const { return NewRC; }
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
new file mode 100644
index 000000000000..a21d6c1d4d63
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -0,0 +1,1350 @@
+//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RegisterPressure class which can be used to track
+// MachineInstr level register pressure.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+/// Increase pressure for each pressure set provided by TargetRegisterInfo.
+static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ const MachineRegisterInfo &MRI, unsigned Reg,
+ LaneBitmask PrevMask, LaneBitmask NewMask) {
+ assert((PrevMask & ~NewMask) == 0 && "Must not remove bits");
+ if (PrevMask != 0 || NewMask == 0)
+ return;
+
+ PSetIterator PSetI = MRI.getPressureSets(Reg);
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI)
+ CurrSetPressure[*PSetI] += Weight;
+}
+
+/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
+static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
+ const MachineRegisterInfo &MRI, unsigned Reg,
+ LaneBitmask PrevMask, LaneBitmask NewMask) {
+ assert((NewMask & !PrevMask) == 0 && "Must not add bits");
+ if (NewMask != 0 || PrevMask == 0)
+ return;
+
+ PSetIterator PSetI = MRI.getPressureSets(Reg);
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow");
+ CurrSetPressure[*PSetI] -= Weight;
+ }
+}
+
+LLVM_DUMP_METHOD
+void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
+ const TargetRegisterInfo *TRI) {
+ bool Empty = true;
+ for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) {
+ if (SetPressure[i] != 0) {
+ dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n';
+ Empty = false;
+ }
+ }
+ if (Empty)
+ dbgs() << "\n";
+}
+
+LLVM_DUMP_METHOD
+void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
+ dbgs() << "Max Pressure: ";
+ dumpRegSetPressure(MaxSetPressure, TRI);
+ dbgs() << "Live In: ";
+ for (const RegisterMaskPair &P : LiveInRegs) {
+ dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
+ if (P.LaneMask != ~0u)
+ dbgs() << ':' << PrintLaneMask(P.LaneMask);
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+ dbgs() << "Live Out: ";
+ for (const RegisterMaskPair &P : LiveOutRegs) {
+ dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
+ if (P.LaneMask != ~0u)
+ dbgs() << ':' << PrintLaneMask(P.LaneMask);
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+}
+
+LLVM_DUMP_METHOD
+void RegPressureTracker::dump() const {
+ if (!isTopClosed() || !isBottomClosed()) {
+ dbgs() << "Curr Pressure: ";
+ dumpRegSetPressure(CurrSetPressure, TRI);
+ }
+ P.dump(TRI);
+}
+
+void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
+ const char *sep = "";
+ for (const PressureChange &Change : *this) {
+ if (!Change.isValid())
+ break;
+ dbgs() << sep << TRI.getRegPressureSetName(Change.getPSet())
+ << " " << Change.getUnitInc();
+ sep = " ";
+ }
+ dbgs() << '\n';
+}
+
+void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
+ LaneBitmask PreviousMask,
+ LaneBitmask NewMask) {
+ if (PreviousMask != 0 || NewMask == 0)
+ return;
+
+ PSetIterator PSetI = MRI->getPressureSets(RegUnit);
+ unsigned Weight = PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ CurrSetPressure[*PSetI] += Weight;
+ P.MaxSetPressure[*PSetI] =
+ std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]);
+ }
+}
+
+void RegPressureTracker::decreaseRegPressure(unsigned RegUnit,
+ LaneBitmask PreviousMask,
+ LaneBitmask NewMask) {
+ decreaseSetPressure(CurrSetPressure, *MRI, RegUnit, PreviousMask, NewMask);
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void IntervalPressure::reset() {
+ TopIdx = BottomIdx = SlotIndex();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// Clear the result so it can be used for another round of pressure tracking.
+void RegionPressure::reset() {
+ TopPos = BottomPos = MachineBasicBlock::const_iterator();
+ MaxSetPressure.clear();
+ LiveInRegs.clear();
+ LiveOutRegs.clear();
+}
+
+/// If the current top is not less than or equal to the next index, open it.
+/// We happen to need the SlotIndex for the next top for pressure update.
+void IntervalPressure::openTop(SlotIndex NextTop) {
+ if (TopIdx <= NextTop)
+ return;
+ TopIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current top is the previous instruction (before receding), open it.
+void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) {
+ if (TopPos != PrevTop)
+ return;
+ TopPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is not greater than the previous index, open it.
+void IntervalPressure::openBottom(SlotIndex PrevBottom) {
+ if (BottomIdx > PrevBottom)
+ return;
+ BottomIdx = SlotIndex();
+ LiveInRegs.clear();
+}
+
+/// If the current bottom is the previous instr (before advancing), open it.
+void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
+ if (BottomPos != PrevBottom)
+ return;
+ BottomPos = MachineBasicBlock::const_iterator();
+ LiveInRegs.clear();
+}
+
+void LiveRegSet::init(const MachineRegisterInfo &MRI) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ unsigned NumRegUnits = TRI.getNumRegs();
+ unsigned NumVirtRegs = MRI.getNumVirtRegs();
+ Regs.setUniverse(NumRegUnits + NumVirtRegs);
+ this->NumRegUnits = NumRegUnits;
+}
+
+void LiveRegSet::clear() {
+ Regs.clear();
+}
+
+static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return &LIS.getInterval(Reg);
+ return LIS.getCachedRegUnit(Reg);
+}
+
+void RegPressureTracker::reset() {
+ MBB = nullptr;
+ LIS = nullptr;
+
+ CurrSetPressure.clear();
+ LiveThruPressure.clear();
+ P.MaxSetPressure.clear();
+
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).reset();
+ else
+ static_cast<RegionPressure&>(P).reset();
+
+ LiveRegs.clear();
+ UntiedDefs.clear();
+}
+
+/// Setup the RegPressureTracker.
+///
+/// TODO: Add support for pressure without LiveIntervals.
+void RegPressureTracker::init(const MachineFunction *mf,
+ const RegisterClassInfo *rci,
+ const LiveIntervals *lis,
+ const MachineBasicBlock *mbb,
+ MachineBasicBlock::const_iterator pos,
+ bool TrackLaneMasks, bool TrackUntiedDefs) {
+ reset();
+
+ MF = mf;
+ TRI = MF->getSubtarget().getRegisterInfo();
+ RCI = rci;
+ MRI = &MF->getRegInfo();
+ MBB = mbb;
+ this->TrackUntiedDefs = TrackUntiedDefs;
+ this->TrackLaneMasks = TrackLaneMasks;
+
+ if (RequireIntervals) {
+ assert(lis && "IntervalPressure requires LiveIntervals");
+ LIS = lis;
+ }
+
+ CurrPos = pos;
+ CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
+
+ P.MaxSetPressure = CurrSetPressure;
+
+ LiveRegs.init(*MRI);
+ if (TrackUntiedDefs)
+ UntiedDefs.setUniverse(MRI->getNumVirtRegs());
+}
+
+/// Does this pressure result have a valid top position and live ins.
+bool RegPressureTracker::isTopClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).TopIdx.isValid();
+ return (static_cast<RegionPressure&>(P).TopPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+/// Does this pressure result have a valid bottom position and live outs.
+bool RegPressureTracker::isBottomClosed() const {
+ if (RequireIntervals)
+ return static_cast<IntervalPressure&>(P).BottomIdx.isValid();
+ return (static_cast<RegionPressure&>(P).BottomPos ==
+ MachineBasicBlock::const_iterator());
+}
+
+
+SlotIndex RegPressureTracker::getCurrSlot() const {
+ MachineBasicBlock::const_iterator IdxPos = CurrPos;
+ while (IdxPos != MBB->end() && IdxPos->isDebugValue())
+ ++IdxPos;
+ if (IdxPos == MBB->end())
+ return LIS->getMBBEndIdx(MBB);
+ return LIS->getInstructionIndex(*IdxPos).getRegSlot();
+}
+
+/// Set the boundary for the top of the region and summarize live ins.
+void RegPressureTracker::closeTop() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).TopIdx = getCurrSlot();
+ else
+ static_cast<RegionPressure&>(P).TopPos = CurrPos;
+
+ assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
+ P.LiveInRegs.reserve(LiveRegs.size());
+ LiveRegs.appendTo(P.LiveInRegs);
+}
+
+/// Set the boundary for the bottom of the region and summarize live outs.
+void RegPressureTracker::closeBottom() {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).BottomIdx = getCurrSlot();
+ else
+ static_cast<RegionPressure&>(P).BottomPos = CurrPos;
+
+ assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
+ P.LiveOutRegs.reserve(LiveRegs.size());
+ LiveRegs.appendTo(P.LiveOutRegs);
+}
+
+/// Finalize the region boundaries and record live ins and live outs.
+void RegPressureTracker::closeRegion() {
+ if (!isTopClosed() && !isBottomClosed()) {
+ assert(LiveRegs.size() == 0 && "no region boundary");
+ return;
+ }
+ if (!isBottomClosed())
+ closeBottom();
+ else if (!isTopClosed())
+ closeTop();
+ // If both top and bottom are closed, do nothing.
+}
+
+/// The register tracker is unaware of global liveness so ignores normal
+/// live-thru ranges. However, two-address or coalesced chains can also lead
+/// to live ranges with no holes. Count these to inform heuristics that we
+/// can never drop below this pressure.
+void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
+ LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0);
+ assert(isBottomClosed() && "need bottom-up tracking to intialize.");
+ for (const RegisterMaskPair &Pair : P.LiveOutRegs) {
+ unsigned RegUnit = Pair.RegUnit;
+ if (TargetRegisterInfo::isVirtualRegister(RegUnit)
+ && !RPTracker.hasUntiedDef(RegUnit))
+ increaseSetPressure(LiveThruPressure, *MRI, RegUnit, 0, Pair.LaneMask);
+ }
+}
+
+static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
+ unsigned RegUnit) {
+ auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
+ [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end())
+ return 0;
+ return I->LaneMask;
+}
+
+static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
+ RegisterMaskPair Pair) {
+ unsigned RegUnit = Pair.RegUnit;
+ assert(Pair.LaneMask != 0);
+ auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
+ [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end()) {
+ RegUnits.push_back(Pair);
+ } else {
+ I->LaneMask |= Pair.LaneMask;
+ }
+}
+
+static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
+ unsigned RegUnit) {
+ auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
+ [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I == RegUnits.end()) {
+ RegUnits.push_back(RegisterMaskPair(RegUnit, 0));
+ } else {
+ I->LaneMask = 0;
+ }
+}
+
+static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
+ RegisterMaskPair Pair) {
+ unsigned RegUnit = Pair.RegUnit;
+ assert(Pair.LaneMask != 0);
+ auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
+ [RegUnit](const RegisterMaskPair Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ if (I != RegUnits.end()) {
+ I->LaneMask &= ~Pair.LaneMask;
+ if (I->LaneMask == 0)
+ RegUnits.erase(I);
+ }
+}
+
+static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit,
+ SlotIndex Pos, LaneBitmask SafeDefault,
+ bool(*Property)(const LiveRange &LR, SlotIndex Pos)) {
+ if (TargetRegisterInfo::isVirtualRegister(RegUnit)) {
+ const LiveInterval &LI = LIS.getInterval(RegUnit);
+ LaneBitmask Result = 0;
+ if (TrackLaneMasks && LI.hasSubRanges()) {
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if (Property(SR, Pos))
+ Result |= SR.LaneMask;
+ }
+ } else if (Property(LI, Pos)) {
+ Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit) : ~0u;
+ }
+
+ return Result;
+ } else {
+ const LiveRange *LR = LIS.getCachedRegUnit(RegUnit);
+ // Be prepared for missing liveranges: We usually do not compute liveranges
+ // for physical registers on targets with many registers (GPUs).
+ if (LR == nullptr)
+ return SafeDefault;
+ return Property(*LR, Pos) ? ~0u : 0;
+ }
+}
+
+static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, unsigned RegUnit,
+ SlotIndex Pos) {
+ return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+ [](const LiveRange &LR, SlotIndex Pos) {
+ return LR.liveAt(Pos);
+ });
+}
+
+
+namespace {
+
+/// Collect this instruction's unique uses and defs into SmallVectors for
+/// processing defs and uses in order.
+///
+/// FIXME: always ignore tied opers
+class RegisterOperandsCollector {
+ RegisterOperands &RegOpers;
+ const TargetRegisterInfo &TRI;
+ const MachineRegisterInfo &MRI;
+ bool IgnoreDead;
+
+ RegisterOperandsCollector(RegisterOperands &RegOpers,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI, bool IgnoreDead)
+ : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {}
+
+ void collectInstr(const MachineInstr &MI) const {
+ for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+ collectOperand(*OperI);
+
+ // Remove redundant physreg dead defs.
+ for (const RegisterMaskPair &P : RegOpers.Defs)
+ removeRegLanes(RegOpers.DeadDefs, P);
+ }
+
+ void collectInstrLanes(const MachineInstr &MI) const {
+ for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+ collectOperandLanes(*OperI);
+
+ // Remove redundant physreg dead defs.
+ for (const RegisterMaskPair &P : RegOpers.Defs)
+ removeRegLanes(RegOpers.DeadDefs, P);
+ }
+
+ /// Push this operand's register onto the correct vectors.
+ void collectOperand(const MachineOperand &MO) const {
+ if (!MO.isReg() || !MO.getReg())
+ return;
+ unsigned Reg = MO.getReg();
+ if (MO.isUse()) {
+ if (!MO.isUndef() && !MO.isInternalRead())
+ pushReg(Reg, RegOpers.Uses);
+ } else {
+ assert(MO.isDef());
+ // Subregister definitions may imply a register read.
+ if (MO.readsReg())
+ pushReg(Reg, RegOpers.Uses);
+
+ if (MO.isDead()) {
+ if (!IgnoreDead)
+ pushReg(Reg, RegOpers.DeadDefs);
+ } else
+ pushReg(Reg, RegOpers.Defs);
+ }
+ }
+
+ void pushReg(unsigned Reg,
+ SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ addRegLanes(RegUnits, RegisterMaskPair(Reg, ~0u));
+ } else if (MRI.isAllocatable(Reg)) {
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
+ }
+ }
+
+ void collectOperandLanes(const MachineOperand &MO) const {
+ if (!MO.isReg() || !MO.getReg())
+ return;
+ unsigned Reg = MO.getReg();
+ unsigned SubRegIdx = MO.getSubReg();
+ if (MO.isUse()) {
+ if (!MO.isUndef() && !MO.isInternalRead())
+ pushRegLanes(Reg, SubRegIdx, RegOpers.Uses);
+ } else {
+ assert(MO.isDef());
+ // Treat read-undef subreg defs as definitions of the whole register.
+ if (MO.isUndef())
+ SubRegIdx = 0;
+
+ if (MO.isDead()) {
+ if (!IgnoreDead)
+ pushRegLanes(Reg, SubRegIdx, RegOpers.DeadDefs);
+ } else
+ pushRegLanes(Reg, SubRegIdx, RegOpers.Defs);
+ }
+ }
+
+ void pushRegLanes(unsigned Reg, unsigned SubRegIdx,
+ SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ LaneBitmask LaneMask = SubRegIdx != 0
+ ? TRI.getSubRegIndexLaneMask(SubRegIdx)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
+ } else if (MRI.isAllocatable(Reg)) {
+ for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
+ }
+ }
+
+ friend class llvm::RegisterOperands;
+};
+
+} // namespace
+
+void RegisterOperands::collect(const MachineInstr &MI,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, bool IgnoreDead) {
+ RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead);
+ if (TrackLaneMasks)
+ Collector.collectInstrLanes(MI);
+ else
+ Collector.collectInstr(MI);
+}
+
+void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
+ const LiveIntervals &LIS) {
+ SlotIndex SlotIdx = LIS.getInstructionIndex(MI);
+ for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) {
+ unsigned Reg = RI->RegUnit;
+ const LiveRange *LR = getLiveRange(LIS, Reg);
+ if (LR != nullptr) {
+ LiveQueryResult LRQ = LR->Query(SlotIdx);
+ if (LRQ.isDeadDef()) {
+ // LiveIntervals knows this is a dead even though it's MachineOperand is
+ // not flagged as such.
+ DeadDefs.push_back(*RI);
+ RI = Defs.erase(RI);
+ continue;
+ }
+ }
+ ++RI;
+ }
+}
+
+void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ SlotIndex Pos,
+ MachineInstr *AddFlagsMI) {
+ for (auto I = Defs.begin(); I != Defs.end(); ) {
+ LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
+ Pos.getDeadSlot());
+ // If the the def is all that is live after the instruction, then in case
+ // of a subregister def we need a read-undef flag.
+ unsigned RegUnit = I->RegUnit;
+ if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
+ AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask) == 0)
+ AddFlagsMI->setRegisterDefReadUndef(RegUnit);
+
+ LaneBitmask ActualDef = I->LaneMask & LiveAfter;
+ if (ActualDef == 0) {
+ I = Defs.erase(I);
+ } else {
+ I->LaneMask = ActualDef;
+ ++I;
+ }
+ }
+ for (auto I = Uses.begin(); I != Uses.end(); ) {
+ LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
+ Pos.getBaseIndex());
+ LaneBitmask LaneMask = I->LaneMask & LiveBefore;
+ if (LaneMask == 0) {
+ I = Uses.erase(I);
+ } else {
+ I->LaneMask = LaneMask;
+ ++I;
+ }
+ }
+ if (AddFlagsMI != nullptr) {
+ for (const RegisterMaskPair &P : DeadDefs) {
+ unsigned RegUnit = P.RegUnit;
+ if (!TargetRegisterInfo::isVirtualRegister(RegUnit))
+ continue;
+ LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
+ Pos.getDeadSlot());
+ if (LiveAfter == 0)
+ AddFlagsMI->setRegisterDefReadUndef(RegUnit);
+ }
+ }
+}
+
+/// Initialize an array of N PressureDiffs.
+void PressureDiffs::init(unsigned N) {
+ Size = N;
+ if (N <= Max) {
+ memset(PDiffArray, 0, N * sizeof(PressureDiff));
+ return;
+ }
+ Max = Size;
+ free(PDiffArray);
+ PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff)));
+}
+
+void PressureDiffs::addInstruction(unsigned Idx,
+ const RegisterOperands &RegOpers,
+ const MachineRegisterInfo &MRI) {
+ PressureDiff &PDiff = (*this)[Idx];
+ assert(!PDiff.begin()->isValid() && "stale PDiff");
+ for (const RegisterMaskPair &P : RegOpers.Defs)
+ PDiff.addPressureChange(P.RegUnit, true, &MRI);
+
+ for (const RegisterMaskPair &P : RegOpers.Uses)
+ PDiff.addPressureChange(P.RegUnit, false, &MRI);
+}
+
+/// Add a change in pressure to the pressure diff of a given instruction.
+void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
+ const MachineRegisterInfo *MRI) {
+ PSetIterator PSetI = MRI->getPressureSets(RegUnit);
+ int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
+ for (; PSetI.isValid(); ++PSetI) {
+ // Find an existing entry in the pressure diff for this PSet.
+ PressureDiff::iterator I = nonconst_begin(), E = nonconst_end();
+ for (; I != E && I->isValid(); ++I) {
+ if (I->getPSet() >= *PSetI)
+ break;
+ }
+ // If all pressure sets are more constrained, skip the remaining PSets.
+ if (I == E)
+ break;
+ // Insert this PressureChange.
+ if (!I->isValid() || I->getPSet() != *PSetI) {
+ PressureChange PTmp = PressureChange(*PSetI);
+ for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J)
+ std::swap(*J, PTmp);
+ }
+ // Update the units for this pressure set.
+ unsigned NewUnitInc = I->getUnitInc() + Weight;
+ if (NewUnitInc != 0) {
+ I->setUnitInc(NewUnitInc);
+ } else {
+ // Remove entry
+ PressureDiff::iterator J;
+ for (J = std::next(I); J != E && J->isValid(); ++J, ++I)
+ *I = *J;
+ if (J != E)
+ *I = *J;
+ }
+ }
+}
+
+/// Force liveness of registers.
+void RegPressureTracker::addLiveRegs(ArrayRef<RegisterMaskPair> Regs) {
+ for (const RegisterMaskPair &P : Regs) {
+ LaneBitmask PrevMask = LiveRegs.insert(P);
+ LaneBitmask NewMask = PrevMask | P.LaneMask;
+ increaseRegPressure(P.RegUnit, PrevMask, NewMask);
+ }
+}
+
+void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
+ SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) {
+ assert(Pair.LaneMask != 0);
+
+ unsigned RegUnit = Pair.RegUnit;
+ auto I = std::find_if(LiveInOrOut.begin(), LiveInOrOut.end(),
+ [RegUnit](const RegisterMaskPair &Other) {
+ return Other.RegUnit == RegUnit;
+ });
+ LaneBitmask PrevMask;
+ LaneBitmask NewMask;
+ if (I == LiveInOrOut.end()) {
+ PrevMask = 0;
+ NewMask = Pair.LaneMask;
+ LiveInOrOut.push_back(Pair);
+ } else {
+ PrevMask = I->LaneMask;
+ NewMask = PrevMask | Pair.LaneMask;
+ I->LaneMask = NewMask;
+ }
+ increaseSetPressure(P.MaxSetPressure, *MRI, RegUnit, PrevMask, NewMask);
+}
+
+void RegPressureTracker::discoverLiveIn(RegisterMaskPair Pair) {
+ discoverLiveInOrOut(Pair, P.LiveInRegs);
+}
+
+void RegPressureTracker::discoverLiveOut(RegisterMaskPair Pair) {
+ discoverLiveInOrOut(Pair, P.LiveOutRegs);
+}
+
+void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
+ for (const RegisterMaskPair &P : DeadDefs) {
+ unsigned Reg = P.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+ increaseRegPressure(Reg, LiveMask, BumpedMask);
+ }
+ for (const RegisterMaskPair &P : DeadDefs) {
+ unsigned Reg = P.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask BumpedMask = LiveMask | P.LaneMask;
+ decreaseRegPressure(Reg, BumpedMask, LiveMask);
+ }
+}
+
+/// Recede across the previous instruction. If LiveUses is provided, record any
+/// RegUnits that are made live by the current instruction's uses. This includes
+/// registers that are both defined and used by the instruction. If a pressure
+/// difference pointer is provided record the changes is pressure caused by this
+/// instruction independent of liveness.
+void RegPressureTracker::recede(const RegisterOperands &RegOpers,
+ SmallVectorImpl<RegisterMaskPair> *LiveUses) {
+ assert(!CurrPos->isDebugValue());
+
+ // Boost pressure for all dead defs together.
+ bumpDeadDefs(RegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ // TODO: consider earlyclobbers?
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ unsigned Reg = Def.RegUnit;
+
+ LaneBitmask PreviousMask = LiveRegs.erase(Def);
+ LaneBitmask NewMask = PreviousMask & ~Def.LaneMask;
+
+ LaneBitmask LiveOut = Def.LaneMask & ~PreviousMask;
+ if (LiveOut != 0) {
+ discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
+ // Retroactively model effects on pressure of the live out lanes.
+ increaseSetPressure(CurrSetPressure, *MRI, Reg, 0, LiveOut);
+ PreviousMask = LiveOut;
+ }
+
+ if (NewMask == 0) {
+ // Add a 0 entry to LiveUses as a marker that the complete vreg has become
+ // dead.
+ if (TrackLaneMasks && LiveUses != nullptr)
+ setRegZero(*LiveUses, Reg);
+ }
+
+ decreaseRegPressure(Reg, PreviousMask, NewMask);
+ }
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
+
+ // Generate liveness for uses.
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ unsigned Reg = Use.RegUnit;
+ assert(Use.LaneMask != 0);
+ LaneBitmask PreviousMask = LiveRegs.insert(Use);
+ LaneBitmask NewMask = PreviousMask | Use.LaneMask;
+ if (NewMask == PreviousMask)
+ continue;
+
+ // Did the register just become live?
+ if (PreviousMask == 0) {
+ if (LiveUses != nullptr) {
+ if (!TrackLaneMasks) {
+ addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
+ } else {
+ auto I = std::find_if(LiveUses->begin(), LiveUses->end(),
+ [Reg](const RegisterMaskPair Other) {
+ return Other.RegUnit == Reg;
+ });
+ bool IsRedef = I != LiveUses->end();
+ if (IsRedef) {
+ // ignore re-defs here...
+ assert(I->LaneMask == 0);
+ removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
+ } else {
+ addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
+ }
+ }
+ }
+
+ // Discover live outs if this may be the first occurance of this register.
+ if (RequireIntervals) {
+ LaneBitmask LiveOut = getLiveThroughAt(Reg, SlotIdx);
+ if (LiveOut != 0)
+ discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
+ }
+ }
+
+ increaseRegPressure(Reg, PreviousMask, NewMask);
+ }
+ if (TrackUntiedDefs) {
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ unsigned RegUnit = Def.RegUnit;
+ if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
+ (LiveRegs.contains(RegUnit) & Def.LaneMask) == 0)
+ UntiedDefs.insert(RegUnit);
+ }
+ }
+}
+
+void RegPressureTracker::recedeSkipDebugValues() {
+ assert(CurrPos != MBB->begin());
+ if (!isBottomClosed())
+ closeBottom();
+
+ // Open the top of the region using block iterators.
+ if (!RequireIntervals && isTopClosed())
+ static_cast<RegionPressure&>(P).openTop(CurrPos);
+
+ // Find the previous instruction.
+ do
+ --CurrPos;
+ while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
+
+ // Open the top of the region using slot indexes.
+ if (RequireIntervals && isTopClosed())
+ static_cast<IntervalPressure&>(P).openTop(SlotIdx);
+}
+
+void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) {
+ recedeSkipDebugValues();
+
+ const MachineInstr &MI = *CurrPos;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+ } else if (RequireIntervals) {
+ RegOpers.detectDeadDefs(MI, *LIS);
+ }
+
+ recede(RegOpers, LiveUses);
+}
+
+/// Advance across the current instruction.
+void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
+ assert(!TrackUntiedDefs && "unsupported mode");
+ assert(CurrPos != MBB->end());
+ if (!isTopClosed())
+ closeTop();
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = getCurrSlot();
+
+ // Open the bottom of the region using slot indexes.
+ if (isBottomClosed()) {
+ if (RequireIntervals)
+ static_cast<IntervalPressure&>(P).openBottom(SlotIdx);
+ else
+ static_cast<RegionPressure&>(P).openBottom(CurrPos);
+ }
+
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ unsigned Reg = Use.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask LiveIn = Use.LaneMask & ~LiveMask;
+ if (LiveIn != 0) {
+ discoverLiveIn(RegisterMaskPair(Reg, LiveIn));
+ increaseRegPressure(Reg, LiveMask, LiveMask | LiveIn);
+ LiveRegs.insert(RegisterMaskPair(Reg, LiveIn));
+ }
+ // Kill liveness at last uses.
+ if (RequireIntervals) {
+ LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
+ if (LastUseMask != 0) {
+ LiveRegs.erase(RegisterMaskPair(Reg, LastUseMask));
+ decreaseRegPressure(Reg, LiveMask, LiveMask & ~LastUseMask);
+ }
+ }
+ }
+
+ // Generate liveness for defs.
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ LaneBitmask PreviousMask = LiveRegs.insert(Def);
+ LaneBitmask NewMask = PreviousMask | Def.LaneMask;
+ increaseRegPressure(Def.RegUnit, PreviousMask, NewMask);
+ }
+
+ // Boost pressure for all dead defs together.
+ bumpDeadDefs(RegOpers.DeadDefs);
+
+ // Find the next instruction.
+ do
+ ++CurrPos;
+ while (CurrPos != MBB->end() && CurrPos->isDebugValue());
+}
+
+void RegPressureTracker::advance() {
+ const MachineInstr &MI = *CurrPos;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = getCurrSlot();
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+ }
+ advance(RegOpers);
+}
+
+/// Find the max change in excess pressure across all sets.
+static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
+ ArrayRef<unsigned> NewPressureVec,
+ RegPressureDelta &Delta,
+ const RegisterClassInfo *RCI,
+ ArrayRef<unsigned> LiveThruPressureVec) {
+ Delta.Excess = PressureChange();
+ for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldPressureVec[i];
+ unsigned PNew = NewPressureVec[i];
+ int PDiff = (int)PNew - (int)POld;
+ if (!PDiff) // No change in this set in the common case.
+ continue;
+ // Only consider change beyond the limit.
+ unsigned Limit = RCI->getRegPressureSetLimit(i);
+ if (!LiveThruPressureVec.empty())
+ Limit += LiveThruPressureVec[i];
+
+ if (Limit > POld) {
+ if (Limit > PNew)
+ PDiff = 0; // Under the limit
+ else
+ PDiff = PNew - Limit; // Just exceeded limit.
+ } else if (Limit > PNew)
+ PDiff = Limit - POld; // Just obeyed limit.
+
+ if (PDiff) {
+ Delta.Excess = PressureChange(i);
+ Delta.Excess.setUnitInc(PDiff);
+ break;
+ }
+ }
+}
+
+/// Find the max change in max pressure that either surpasses a critical PSet
+/// limit or exceeds the current MaxPressureLimit.
+///
+/// FIXME: comparing each element of the old and new MaxPressure vectors here is
+/// silly. It's done now to demonstrate the concept but will go away with a
+/// RegPressureTracker API change to work with pressure differences.
+static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
+ ArrayRef<unsigned> NewMaxPressureVec,
+ ArrayRef<PressureChange> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit,
+ RegPressureDelta &Delta) {
+ Delta.CriticalMax = PressureChange();
+ Delta.CurrentMax = PressureChange();
+
+ unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
+ for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) {
+ unsigned POld = OldMaxPressureVec[i];
+ unsigned PNew = NewMaxPressureVec[i];
+ if (PNew == POld) // No change in this set in the common case.
+ continue;
+
+ if (!Delta.CriticalMax.isValid()) {
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < i)
+ ++CritIdx;
+
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) {
+ int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc();
+ if (PDiff > 0) {
+ Delta.CriticalMax = PressureChange(i);
+ Delta.CriticalMax.setUnitInc(PDiff);
+ }
+ }
+ }
+ // Find the first increase above MaxPressureLimit.
+ // (Ignores negative MDiff).
+ if (!Delta.CurrentMax.isValid() && PNew > MaxPressureLimit[i]) {
+ Delta.CurrentMax = PressureChange(i);
+ Delta.CurrentMax.setUnitInc(PNew - POld);
+ if (CritIdx == CritEnd || Delta.CriticalMax.isValid())
+ break;
+ }
+ }
+}
+
+/// Record the upward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/true);
+ assert(RegOpers.DeadDefs.size() == 0);
+ if (TrackLaneMasks)
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+ else if (RequireIntervals)
+ RegOpers.detectDeadDefs(*MI, *LIS);
+
+ // Boost max pressure for all dead defs together.
+ // Since CurrSetPressure and MaxSetPressure
+ bumpDeadDefs(RegOpers.DeadDefs);
+
+ // Kill liveness at live defs.
+ for (const RegisterMaskPair &P : RegOpers.Defs) {
+ unsigned Reg = P.RegUnit;
+ LaneBitmask LiveLanes = LiveRegs.contains(Reg);
+ LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
+ LaneBitmask DefLanes = P.LaneMask;
+ LaneBitmask LiveAfter = (LiveLanes & ~DefLanes) | UseLanes;
+ decreaseRegPressure(Reg, LiveLanes, LiveAfter);
+ }
+ // Generate liveness for uses.
+ for (const RegisterMaskPair &P : RegOpers.Uses) {
+ unsigned Reg = P.RegUnit;
+ LaneBitmask LiveLanes = LiveRegs.contains(Reg);
+ LaneBitmask LiveAfter = LiveLanes | P.LaneMask;
+ increaseRegPressure(Reg, LiveLanes, LiveAfter);
+ }
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// bottom-up. Find the pressure set with the most change beyond its pressure
+/// limit based on the tracker's current pressure, and return the change in
+/// number of register units of that pressure set introduced by this
+/// instruction.
+///
+/// This assumes that the current LiveOut set is sufficient.
+///
+/// This is expensive for an on-the-fly query because it calls
+/// bumpUpwardPressure to recompute the pressure sets based on current
+/// liveness. This mainly exists to verify correctness, e.g. with
+/// -verify-misched. getUpwardPressureDelta is the fast version of this query
+/// that uses the per-SUnit cache of the PressureDiff.
+void RegPressureTracker::
+getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff,
+ RegPressureDelta &Delta,
+ ArrayRef<PressureChange> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ // FIXME: The snapshot heap space should persist. But I'm planning to
+ // summarize the pressure effect so we don't need to snapshot at all.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI,
+ LiveThruPressure);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.getUnitInc() >= 0 &&
+ Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+
+#ifndef NDEBUG
+ if (!PDiff)
+ return;
+
+ // Check if the alternate algorithm yields the same result.
+ RegPressureDelta Delta2;
+ getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit);
+ if (Delta != Delta2) {
+ dbgs() << "PDiff: ";
+ PDiff->dump(*TRI);
+ dbgs() << "DELTA: " << *MI;
+ if (Delta.Excess.isValid())
+ dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet())
+ << " " << Delta.Excess.getUnitInc() << "\n";
+ if (Delta.CriticalMax.isValid())
+ dbgs() << "Critic1 " << TRI->getRegPressureSetName(Delta.CriticalMax.getPSet())
+ << " " << Delta.CriticalMax.getUnitInc() << "\n";
+ if (Delta.CurrentMax.isValid())
+ dbgs() << "CurrMx1 " << TRI->getRegPressureSetName(Delta.CurrentMax.getPSet())
+ << " " << Delta.CurrentMax.getUnitInc() << "\n";
+ if (Delta2.Excess.isValid())
+ dbgs() << "Excess2 " << TRI->getRegPressureSetName(Delta2.Excess.getPSet())
+ << " " << Delta2.Excess.getUnitInc() << "\n";
+ if (Delta2.CriticalMax.isValid())
+ dbgs() << "Critic2 " << TRI->getRegPressureSetName(Delta2.CriticalMax.getPSet())
+ << " " << Delta2.CriticalMax.getUnitInc() << "\n";
+ if (Delta2.CurrentMax.isValid())
+ dbgs() << "CurrMx2 " << TRI->getRegPressureSetName(Delta2.CurrentMax.getPSet())
+ << " " << Delta2.CurrentMax.getUnitInc() << "\n";
+ llvm_unreachable("RegP Delta Mismatch");
+ }
+#endif
+}
+
+/// This is the fast version of querying register pressure that does not
+/// directly depend on current liveness.
+///
+/// @param Delta captures information needed for heuristics.
+///
+/// @param CriticalPSets Are the pressure sets that are known to exceed some
+/// limit within the region, not necessarily at the current position.
+///
+/// @param MaxPressureLimit Is the max pressure within the region, not
+/// necessarily at the current position.
+void RegPressureTracker::
+getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
+ RegPressureDelta &Delta,
+ ArrayRef<PressureChange> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) const {
+ unsigned CritIdx = 0, CritEnd = CriticalPSets.size();
+ for (PressureDiff::const_iterator
+ PDiffI = PDiff.begin(), PDiffE = PDiff.end();
+ PDiffI != PDiffE && PDiffI->isValid(); ++PDiffI) {
+
+ unsigned PSetID = PDiffI->getPSet();
+ unsigned Limit = RCI->getRegPressureSetLimit(PSetID);
+ if (!LiveThruPressure.empty())
+ Limit += LiveThruPressure[PSetID];
+
+ unsigned POld = CurrSetPressure[PSetID];
+ unsigned MOld = P.MaxSetPressure[PSetID];
+ unsigned MNew = MOld;
+ // Ignore DeadDefs here because they aren't captured by PressureChange.
+ unsigned PNew = POld + PDiffI->getUnitInc();
+ assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld)
+ && "PSet overflow/underflow");
+ if (PNew > MOld)
+ MNew = PNew;
+ // Check if current pressure has exceeded the limit.
+ if (!Delta.Excess.isValid()) {
+ unsigned ExcessInc = 0;
+ if (PNew > Limit)
+ ExcessInc = POld > Limit ? PNew - POld : PNew - Limit;
+ else if (POld > Limit)
+ ExcessInc = Limit - POld;
+ if (ExcessInc) {
+ Delta.Excess = PressureChange(PSetID);
+ Delta.Excess.setUnitInc(ExcessInc);
+ }
+ }
+ // Check if max pressure has exceeded a critical pressure set max.
+ if (MNew == MOld)
+ continue;
+ if (!Delta.CriticalMax.isValid()) {
+ while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < PSetID)
+ ++CritIdx;
+
+ if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) {
+ int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc();
+ if (CritInc > 0 && CritInc <= INT16_MAX) {
+ Delta.CriticalMax = PressureChange(PSetID);
+ Delta.CriticalMax.setUnitInc(CritInc);
+ }
+ }
+ }
+ // Check if max pressure has exceeded the current max.
+ if (!Delta.CurrentMax.isValid() && MNew > MaxPressureLimit[PSetID]) {
+ Delta.CurrentMax = PressureChange(PSetID);
+ Delta.CurrentMax.setUnitInc(MNew - MOld);
+ }
+ }
+}
+
+/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
+/// The query starts with a lane bitmask which gets lanes/bits removed for every
+/// use we find.
+static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
+ SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
+ const MachineRegisterInfo &MRI,
+ const LiveIntervals *LIS) {
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
+ if (MO.isUndef())
+ continue;
+ const MachineInstr *MI = MO.getParent();
+ SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
+ if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) {
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+ LastUseMask &= ~UseMask;
+ if (LastUseMask == 0)
+ return 0;
+ }
+ }
+ return LastUseMask;
+}
+
+LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit,
+ SlotIndex Pos) const {
+ assert(RequireIntervals);
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+ [](const LiveRange &LR, SlotIndex Pos) {
+ return LR.liveAt(Pos);
+ });
+}
+
+LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
+ SlotIndex Pos) const {
+ assert(RequireIntervals);
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit,
+ Pos.getBaseIndex(), 0,
+ [](const LiveRange &LR, SlotIndex Pos) {
+ const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
+ return S != nullptr && S->end == Pos.getRegSlot();
+ });
+}
+
+LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit,
+ SlotIndex Pos) const {
+ assert(RequireIntervals);
+ return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, 0u,
+ [](const LiveRange &LR, SlotIndex Pos) {
+ const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
+ return S != nullptr && S->start < Pos.getRegSlot(true) &&
+ S->end != Pos.getDeadSlot();
+ });
+}
+
+/// Record the downward impact of a single instruction on current register
+/// pressure. Unlike the advance/recede pressure tracking interface, this does
+/// not discover live in/outs.
+///
+/// This is intended for speculative queries. It leaves pressure inconsistent
+/// with the current position, so must be restored by the caller.
+void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
+ assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+
+ // Account for register pressure similar to RegPressureTracker::recede().
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks)
+ RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx);
+
+ if (RequireIntervals) {
+ for (const RegisterMaskPair &Use : RegOpers.Uses) {
+ unsigned Reg = Use.RegUnit;
+ LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
+ if (LastUseMask == 0)
+ continue;
+ // The LastUseMask is queried from the liveness information of instruction
+ // which may be further down the schedule. Some lanes may actually not be
+ // last uses for the current position.
+ // FIXME: allow the caller to pass in the list of vreg uses that remain
+ // to be bottom-scheduled to avoid searching uses at each query.
+ SlotIndex CurrIdx = getCurrSlot();
+ LastUseMask
+ = findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, LIS);
+ if (LastUseMask == 0)
+ continue;
+
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask NewMask = LiveMask & ~LastUseMask;
+ decreaseRegPressure(Reg, LiveMask, NewMask);
+ }
+ }
+
+ // Generate liveness for defs.
+ for (const RegisterMaskPair &Def : RegOpers.Defs) {
+ unsigned Reg = Def.RegUnit;
+ LaneBitmask LiveMask = LiveRegs.contains(Reg);
+ LaneBitmask NewMask = LiveMask | Def.LaneMask;
+ increaseRegPressure(Reg, LiveMask, NewMask);
+ }
+
+ // Boost pressure for all dead defs together.
+ bumpDeadDefs(RegOpers.DeadDefs);
+}
+
+/// Consider the pressure increase caused by traversing this instruction
+/// top-down. Find the register class with the most change in its pressure limit
+/// based on the tracker's current pressure, and return the number of excess
+/// register units of that pressure set introduced by this instruction.
+///
+/// This assumes that the current LiveIn set is sufficient.
+///
+/// This is expensive for an on-the-fly query because it calls
+/// bumpDownwardPressure to recompute the pressure sets based on current
+/// liveness. We don't yet have a fast version of downward pressure tracking
+/// analogous to getUpwardPressureDelta.
+void RegPressureTracker::
+getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
+ ArrayRef<PressureChange> CriticalPSets,
+ ArrayRef<unsigned> MaxPressureLimit) {
+ // Snapshot Pressure.
+ std::vector<unsigned> SavedPressure = CurrSetPressure;
+ std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI,
+ LiveThruPressure);
+ computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets,
+ MaxPressureLimit, Delta);
+ assert(Delta.CriticalMax.getUnitInc() >= 0 &&
+ Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure");
+
+ // Restore the tracker's state.
+ P.MaxSetPressure.swap(SavedMaxPressure);
+ CurrSetPressure.swap(SavedPressure);
+}
+
+/// Get the pressure of each PSet after traversing this instruction bottom-up.
+void RegPressureTracker::
+getUpwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpUpwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
+
+/// Get the pressure of each PSet after traversing this instruction top-down.
+void RegPressureTracker::
+getDownwardPressure(const MachineInstr *MI,
+ std::vector<unsigned> &PressureResult,
+ std::vector<unsigned> &MaxPressureResult) {
+ // Snapshot pressure.
+ PressureResult = CurrSetPressure;
+ MaxPressureResult = P.MaxSetPressure;
+
+ bumpDownwardPressure(MI);
+
+ // Current pressure becomes the result. Restore current pressure.
+ P.MaxSetPressure.swap(MaxPressureResult);
+ CurrSetPressure.swap(PressureResult);
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 000000000000..6b80179190db
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,458 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements the machine register scavenger. It can provide
+/// information, such as unused registers, at any point in a machine basic
+/// block. It also provides a mechanism to make registers available by evicting
+/// them to spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "reg-scavenging"
+
+void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
+ for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
+ LaneBitmask UnitMask = (*RUI).second;
+ if (UnitMask == 0 || (LaneMask & UnitMask) != 0)
+ RegUnitsAvailable.reset((*RUI).first);
+ }
+}
+
+void RegScavenger::initRegState() {
+ for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ I->Reg = 0;
+ I->Restore = nullptr;
+ }
+
+ // All register units start out unused.
+ RegUnitsAvailable.set();
+
+ // Live-in registers are in use.
+ for (const auto &LI : MBB->liveins())
+ setRegUsed(LI.PhysReg, LI.LaneMask);
+
+ // Pristine CSRs are also unavailable.
+ const MachineFunction &MF = *MBB->getParent();
+ BitVector PR = MF.getFrameInfo()->getPristineRegs(MF);
+ for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+ setRegUsed(I);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
+ MachineFunction &MF = *MBB.getParent();
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
+ "Target changed?");
+
+ // It is not possible to use the register scavenger after late optimization
+ // passes that don't preserve accurate liveness information.
+ assert(MRI->tracksLiveness() &&
+ "Cannot use register scavenger with inaccurate liveness");
+
+ // Self-initialize.
+ if (!this->MBB) {
+ NumRegUnits = TRI->getNumRegUnits();
+ RegUnitsAvailable.resize(NumRegUnits);
+ KillRegUnits.resize(NumRegUnits);
+ DefRegUnits.resize(NumRegUnits);
+ TmpRegUnits.resize(NumRegUnits);
+ }
+ this->MBB = &MBB;
+
+ initRegState();
+
+ Tracking = false;
+}
+
+void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) {
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ BV.set(*RUI);
+}
+
+void RegScavenger::determineKillsAndDefs() {
+ assert(Tracking && "Must be tracking to determine kills and defs");
+
+ MachineInstr &MI = *MBBI;
+ assert(!MI.isDebugValue() && "Debug values have no kills or defs");
+
+ // Find out which registers are early clobbered, killed, defined, and marked
+ // def-dead in this instruction.
+ KillRegUnits.reset();
+ DefRegUnits.reset();
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ TmpRegUnits.clear();
+ for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) {
+ for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
+ if (MO.clobbersPhysReg(*RURI)) {
+ TmpRegUnits.set(RU);
+ break;
+ }
+ }
+ }
+
+ // Apply the mask.
+ KillRegUnits |= TmpRegUnits;
+ }
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg))
+ continue;
+
+ if (MO.isUse()) {
+ // Ignore undef uses.
+ if (MO.isUndef())
+ continue;
+ if (MO.isKill())
+ addRegUnits(KillRegUnits, Reg);
+ } else {
+ assert(MO.isDef());
+ if (MO.isDead())
+ addRegUnits(KillRegUnits, Reg);
+ else
+ addRegUnits(DefRegUnits, Reg);
+ }
+ }
+}
+
+void RegScavenger::unprocess() {
+ assert(Tracking && "Cannot unprocess because we're not tracking");
+
+ MachineInstr &MI = *MBBI;
+ if (!MI.isDebugValue()) {
+ determineKillsAndDefs();
+
+ // Commit the changes.
+ setUsed(KillRegUnits);
+ setUnused(DefRegUnits);
+ }
+
+ if (MBBI == MBB->begin()) {
+ MBBI = MachineBasicBlock::iterator(nullptr);
+ Tracking = false;
+ } else
+ --MBBI;
+}
+
+void RegScavenger::forward() {
+ // Move ptr forward.
+ if (!Tracking) {
+ MBBI = MBB->begin();
+ Tracking = true;
+ } else {
+ assert(MBBI != MBB->end() && "Already past the end of the basic block!");
+ MBBI = std::next(MBBI);
+ }
+ assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+
+ MachineInstr &MI = *MBBI;
+
+ for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
+ IE = Scavenged.end(); I != IE; ++I) {
+ if (I->Restore != &MI)
+ continue;
+
+ I->Reg = 0;
+ I->Restore = nullptr;
+ }
+
+ if (MI.isDebugValue())
+ return;
+
+ determineKillsAndDefs();
+
+ // Verify uses and defs.
+#ifndef NDEBUG
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg))
+ continue;
+ if (MO.isUse()) {
+ if (MO.isUndef())
+ continue;
+ if (!isRegUsed(Reg)) {
+ // Check if it's partial live: e.g.
+ // D0 = insert_subreg D0<undef>, S0
+ // ... D0
+ // The problem is the insert_subreg could be eliminated. The use of
+ // D0 is using a partially undef value. This is not *incorrect* since
+ // S1 is can be freely clobbered.
+ // Ideally we would like a way to model this, but leaving the
+ // insert_subreg around causes both correctness and performance issues.
+ bool SubUsed = false;
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ if (isRegUsed(*SubRegs)) {
+ SubUsed = true;
+ break;
+ }
+ bool SuperUsed = false;
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
+ if (isRegUsed(*SR)) {
+ SuperUsed = true;
+ break;
+ }
+ }
+ if (!SubUsed && !SuperUsed) {
+ MBB->getParent()->verify(nullptr, "In Register Scavenger");
+ llvm_unreachable("Using an undefined register!");
+ }
+ (void)SubUsed;
+ (void)SuperUsed;
+ }
+ } else {
+ assert(MO.isDef());
+#if 0
+ // FIXME: Enable this once we've figured out how to correctly transfer
+ // implicit kills during codegen passes like the coalescer.
+ assert((KillRegs.test(Reg) || isUnused(Reg) ||
+ isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+ "Re-defining a live register!");
+#endif
+ }
+ }
+#endif // NDEBUG
+
+ // Commit the changes.
+ setUnused(KillRegUnits);
+ setUsed(DefRegUnits);
+}
+
+bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
+ if (includeReserved && isReserved(Reg))
+ return true;
+ for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+ if (!RegUnitsAvailable.test(*RUI))
+ return true;
+ return false;
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+ for (unsigned Reg : *RC) {
+ if (!isRegUsed(Reg)) {
+ DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(Reg) <<
+ "\n");
+ return Reg;
+ }
+ }
+ return 0;
+}
+
+BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
+ BitVector Mask(TRI->getNumRegs());
+ for (unsigned Reg : *RC)
+ if (!isRegUsed(Reg))
+ Mask.set(Reg);
+ return Mask;
+}
+
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
+ BitVector &Candidates,
+ unsigned InstrLimit,
+ MachineBasicBlock::iterator &UseMI) {
+ int Survivor = Candidates.find_first();
+ assert(Survivor > 0 && "No candidates for scavenging");
+
+ MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
+ assert(StartMI != ME && "MI already at terminator");
+ MachineBasicBlock::iterator RestorePointMI = StartMI;
+ MachineBasicBlock::iterator MI = StartMI;
+
+ bool inVirtLiveRange = false;
+ for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+ if (MI->isDebugValue()) {
+ ++InstrLimit; // Don't count debug instructions
+ continue;
+ }
+ bool isVirtKillInsn = false;
+ bool isVirtDefInsn = false;
+ // Remove any candidates touched by instruction.
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isRegMask())
+ Candidates.clearBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg())
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isDef())
+ isVirtDefInsn = true;
+ else if (MO.isKill())
+ isVirtKillInsn = true;
+ continue;
+ }
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Candidates.reset(*AI);
+ }
+ // If we're not in a virtual reg's live range, this is a valid
+ // restore point.
+ if (!inVirtLiveRange) RestorePointMI = MI;
+
+ // Update whether we're in the live range of a virtual register
+ if (isVirtKillInsn) inVirtLiveRange = false;
+ if (isVirtDefInsn) inVirtLiveRange = true;
+
+ // Was our survivor untouched by this instruction?
+ if (Candidates.test(Survivor))
+ continue;
+
+ // All candidates gone?
+ if (Candidates.none())
+ break;
+
+ Survivor = Candidates.find_first();
+ }
+ // If we ran off the end, that's where we want to restore.
+ if (MI == ME) RestorePointMI = ME;
+ assert(RestorePointMI != StartMI &&
+ "No available scavenger restore location!");
+
+ // We ran out of candidates, so stop the search.
+ UseMI = RestorePointMI;
+ return Survivor;
+}
+
+static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ return i;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+ MachineBasicBlock::iterator I,
+ int SPAdj) {
+ MachineInstr &MI = *I;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates = TRI->getAllocatableSet(MF, RC);
+
+ // Exclude all the registers being used by the instruction.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ Candidates.reset(MO.getReg());
+ }
+
+ // Try to find a register that's unused if there is one, as then we won't
+ // have to spill.
+ BitVector Available = getRegsAvailable(RC);
+ Available &= Candidates;
+ if (Available.any())
+ Candidates = Available;
+
+ // Find the register whose use is furthest away.
+ MachineBasicBlock::iterator UseMI;
+ unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+ // If we found an unused register there is no reason to spill it.
+ if (!isRegUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+ return SReg;
+ }
+
+ // Find an available scavenging slot with size and alignment matching
+ // the requirements of the class RC.
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned NeedSize = RC->getSize();
+ unsigned NeedAlign = RC->getAlignment();
+
+ unsigned SI = Scavenged.size(), Diff = UINT_MAX;
+ int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
+ for (unsigned I = 0; I < Scavenged.size(); ++I) {
+ if (Scavenged[I].Reg != 0)
+ continue;
+ // Verify that this slot is valid for this register.
+ int FI = Scavenged[I].FrameIndex;
+ if (FI < FIB || FI >= FIE)
+ continue;
+ unsigned S = MFI.getObjectSize(FI);
+ unsigned A = MFI.getObjectAlignment(FI);
+ if (NeedSize > S || NeedAlign > A)
+ continue;
+ // Avoid wasting slots with large size and/or large alignment. Pick one
+ // that is the best fit for this register class (in street metric).
+ // Picking a larger slot than necessary could happen if a slot for a
+ // larger register is reserved before a slot for a smaller one. When
+ // trying to spill a smaller register, the large slot would be found
+ // first, thus making it impossible to spill the larger register later.
+ unsigned D = (S-NeedSize) + (A-NeedAlign);
+ if (D < Diff) {
+ SI = I;
+ Diff = D;
+ }
+ }
+
+ if (SI == Scavenged.size()) {
+ // We need to scavenge a register but have no spill slot, the target
+ // must know how to do it (if not, we'll assert below).
+ Scavenged.push_back(ScavengedInfo(FIE));
+ }
+
+ // Avoid infinite regress
+ Scavenged[SI].Reg = SReg;
+
+ // If the target knows how to save/restore the register, let it do so;
+ // otherwise, use the emergency stack spill slot.
+ if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
+ // Spill the scavenged register before I.
+ int FI = Scavenged[SI].FrameIndex;
+ if (FI < FIB || FI >= FIE) {
+ std::string Msg = std::string("Error while trying to spill ") +
+ TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) +
+ ": Cannot scavenge register without an emergency spill slot!";
+ report_fatal_error(Msg.c_str());
+ }
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
+ RC, TRI);
+ MachineBasicBlock::iterator II = std::prev(I);
+
+ unsigned FIOperandNum = getFrameIndexOperandNum(*II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
+
+ // Restore the scavenged register before its use (or first terminator).
+ TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
+ RC, TRI);
+ II = std::prev(UseMI);
+
+ FIOperandNum = getFrameIndexOperandNum(*II);
+ TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
+ }
+
+ Scavenged[SI].Restore = &*std::prev(UseMI);
+
+ // Doing this here leads to infinite regress.
+ // Scavenged[SI].Reg = SReg;
+
+ DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
+ "\n");
+
+ return SReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
new file mode 100644
index 000000000000..5cf3e57eb3d3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -0,0 +1,93 @@
+//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass is required to take advantage of the interprocedural register
+/// allocation infrastructure.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ip-regalloc"
+
+cl::opt<bool> DumpRegUsage(
+ "print-regusage", cl::init(false), cl::Hidden,
+ cl::desc("print register usage details collected for analysis."));
+
+INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info",
+ "Register Usage Informartion Stroage", false, true)
+
+char PhysicalRegisterUsageInfo::ID = 0;
+
+void PhysicalRegisterUsageInfo::anchor() {}
+
+bool PhysicalRegisterUsageInfo::doInitialization(Module &M) {
+ RegMasks.grow(M.size());
+ return false;
+}
+
+bool PhysicalRegisterUsageInfo::doFinalization(Module &M) {
+ if (DumpRegUsage)
+ print(errs());
+
+ RegMasks.shrink_and_clear();
+ return false;
+}
+
+void PhysicalRegisterUsageInfo::storeUpdateRegUsageInfo(
+ const Function *FP, std::vector<uint32_t> RegMask) {
+ assert(FP != nullptr && "Function * can't be nullptr.");
+ RegMasks[FP] = std::move(RegMask);
+}
+
+const std::vector<uint32_t> *
+PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) {
+ auto It = RegMasks.find(FP);
+ if (It != RegMasks.end())
+ return &(It->second);
+ return nullptr;
+}
+
+void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const {
+ const TargetRegisterInfo *TRI;
+
+ typedef std::pair<const Function *, std::vector<uint32_t>> FuncPtrRegMaskPair;
+
+ SmallVector<const FuncPtrRegMaskPair *, 64> FPRMPairVector;
+
+ // Create a vector of pointer to RegMasks entries
+ for (const auto &RegMask : RegMasks)
+ FPRMPairVector.push_back(&RegMask);
+
+ // sort the vector to print analysis in alphabatic order of function name.
+ std::sort(
+ FPRMPairVector.begin(), FPRMPairVector.end(),
+ [](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool {
+ return A->first->getName() < B->first->getName();
+ });
+
+ for (const FuncPtrRegMaskPair *FPRMPair : FPRMPairVector) {
+ OS << FPRMPair->first->getName() << " "
+ << "Clobbered Registers: ";
+ TRI = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first))
+ .getRegisterInfo();
+
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg))
+ OS << TRI->getName(PReg) << " ";
+ }
+ OS << "\n";
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
new file mode 100644
index 000000000000..ea952d9088fc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -0,0 +1,388 @@
+//===-- RenameIndependentSubregs.cpp - Live Interval Analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// Rename independent subregisters looks for virtual registers with
+/// independently used subregisters and renames them to new virtual registers.
+/// Example: In the following:
+/// %vreg0:sub0<read-undef> = ...
+/// %vreg0:sub1 = ...
+/// use %vreg0:sub0
+/// %vreg0:sub0 = ...
+/// use %vreg0:sub0
+/// use %vreg0:sub1
+/// sub0 and sub1 are never used together, and we have two independent sub0
+/// definitions. This pass will rename to:
+/// %vreg0:sub0<read-undef> = ...
+/// %vreg1:sub1<read-undef> = ...
+/// use %vreg1:sub1
+/// %vreg2:sub1<read-undef> = ...
+/// use %vreg2:sub1
+/// use %vreg0:sub0
+//
+//===----------------------------------------------------------------------===//
+
+#include "LiveRangeUtils.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "rename-independent-subregs"
+
+namespace {
+
+class RenameIndependentSubregs : public MachineFunctionPass {
+public:
+ static char ID;
+ RenameIndependentSubregs() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "Rename Disconnected Subregister Components";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ struct SubRangeInfo {
+ ConnectedVNInfoEqClasses ConEQ;
+ LiveInterval::SubRange *SR;
+ unsigned Index;
+
+ SubRangeInfo(LiveIntervals &LIS, LiveInterval::SubRange &SR,
+ unsigned Index)
+ : ConEQ(LIS), SR(&SR), Index(Index) {}
+ };
+
+ /// Split unrelated subregister components and rename them to new vregs.
+ bool renameComponents(LiveInterval &LI) const;
+
+ /// \brief Build a vector of SubRange infos and a union find set of
+ /// equivalence classes.
+ /// Returns true if more than 1 equivalence class was found.
+ bool findComponents(IntEqClasses &Classes,
+ SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ LiveInterval &LI) const;
+
+ /// \brief Distribute the LiveInterval segments into the new LiveIntervals
+ /// belonging to their class.
+ void distribute(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const;
+
+ /// \brief Constructs main liverange and add missing undef+dead flags.
+ void computeMainRangesFixFlags(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const;
+
+ /// Rewrite Machine Operands to use the new vreg belonging to their class.
+ void rewriteOperands(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const;
+
+
+ LiveIntervals *LIS;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+};
+
+} // end anonymous namespace
+
+char RenameIndependentSubregs::ID;
+
+char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID;
+
+INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, "rename-independent-subregs",
+ "Rename Independent Subregisters", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenameIndependentSubregs, "rename-independent-subregs",
+ "Rename Independent Subregisters", false, false)
+
+bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
+ // Shortcut: We cannot have split components with a single definition.
+ if (LI.valnos.size() < 2)
+ return false;
+
+ SmallVector<SubRangeInfo, 4> SubRangeInfos;
+ IntEqClasses Classes;
+ if (!findComponents(Classes, SubRangeInfos, LI))
+ return false;
+
+ // Create a new VReg for each class.
+ unsigned Reg = LI.reg;
+ const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
+ SmallVector<LiveInterval*, 4> Intervals;
+ Intervals.push_back(&LI);
+ DEBUG(dbgs() << PrintReg(Reg) << ": Found " << Classes.getNumClasses()
+ << " equivalence classes.\n");
+ DEBUG(dbgs() << PrintReg(Reg) << ": Splitting into newly created:");
+ for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses;
+ ++I) {
+ unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+ LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg);
+ Intervals.push_back(&NewLI);
+ DEBUG(dbgs() << ' ' << PrintReg(NewVReg));
+ }
+ DEBUG(dbgs() << '\n');
+
+ rewriteOperands(Classes, SubRangeInfos, Intervals);
+ distribute(Classes, SubRangeInfos, Intervals);
+ computeMainRangesFixFlags(Classes, SubRangeInfos, Intervals);
+ return true;
+}
+
+bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes,
+ SmallVectorImpl<RenameIndependentSubregs::SubRangeInfo> &SubRangeInfos,
+ LiveInterval &LI) const {
+ // First step: Create connected components for the VNInfos inside the
+ // subranges and count the global number of such components.
+ unsigned NumComponents = 0;
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ SubRangeInfos.push_back(SubRangeInfo(*LIS, SR, NumComponents));
+ ConnectedVNInfoEqClasses &ConEQ = SubRangeInfos.back().ConEQ;
+
+ unsigned NumSubComponents = ConEQ.Classify(SR);
+ NumComponents += NumSubComponents;
+ }
+ // Shortcut: With only 1 subrange, the normal separate component tests are
+ // enough and we do not need to perform the union-find on the subregister
+ // segments.
+ if (SubRangeInfos.size() < 2)
+ return false;
+
+ // Next step: Build union-find structure over all subranges and merge classes
+ // across subranges when they are affected by the same MachineOperand.
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ Classes.grow(NumComponents);
+ unsigned Reg = LI.reg;
+ for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+ unsigned MergedID = ~0u;
+ for (RenameIndependentSubregs::SubRangeInfo &SRInfo : SubRangeInfos) {
+ const LiveInterval::SubRange &SR = *SRInfo.SR;
+ if ((SR.LaneMask & LaneMask) == 0)
+ continue;
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
+ : Pos.getBaseIndex();
+ const VNInfo *VNI = SR.getVNInfoAt(Pos);
+ if (VNI == nullptr)
+ continue;
+
+ // Map to local representant ID.
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI);
+ // Global ID
+ unsigned ID = LocalID + SRInfo.Index;
+ // Merge other sets
+ MergedID = MergedID == ~0u ? ID : Classes.join(MergedID, ID);
+ }
+ }
+
+ // Early exit if we ended up with a single equivalence class.
+ Classes.compress();
+ unsigned NumClasses = Classes.getNumClasses();
+ return NumClasses > 1;
+}
+
+void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const {
+ const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
+ unsigned Reg = Intervals[0]->reg;;
+ for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
+ E = MRI->reg_nodbg_end(); I != E; ) {
+ MachineOperand &MO = *I++;
+ if (!MO.isDef() && !MO.readsReg())
+ continue;
+
+ MachineInstr &MI = *MO.getParent();
+
+ SlotIndex Pos = LIS->getInstructionIndex(MI);
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
+
+ unsigned ID = ~0u;
+ for (const SubRangeInfo &SRInfo : SubRangeInfos) {
+ const LiveInterval::SubRange &SR = *SRInfo.SR;
+ if ((SR.LaneMask & LaneMask) == 0)
+ continue;
+ LiveRange::const_iterator I = SR.find(Pos);
+ if (I == SR.end())
+ continue;
+
+ const VNInfo &VNI = *I->valno;
+ // Map to local representant ID.
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI);
+ // Global ID
+ ID = Classes[LocalID + SRInfo.Index];
+ break;
+ }
+
+ unsigned VReg = Intervals[ID]->reg;
+ MO.setReg(VReg);
+ }
+ // TODO: We could attempt to recompute new register classes while visiting
+ // the operands: Some of the split register may be fine with less constraint
+ // classes than the original vreg.
+}
+
+void RenameIndependentSubregs::distribute(const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const {
+ unsigned NumClasses = Classes.getNumClasses();
+ SmallVector<unsigned, 8> VNIMapping;
+ SmallVector<LiveInterval::SubRange*, 8> SubRanges;
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ for (const SubRangeInfo &SRInfo : SubRangeInfos) {
+ LiveInterval::SubRange &SR = *SRInfo.SR;
+ unsigned NumValNos = SR.valnos.size();
+ VNIMapping.clear();
+ VNIMapping.reserve(NumValNos);
+ SubRanges.clear();
+ SubRanges.resize(NumClasses-1, nullptr);
+ for (unsigned I = 0; I < NumValNos; ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI);
+ unsigned ID = Classes[LocalID + SRInfo.Index];
+ VNIMapping.push_back(ID);
+ if (ID > 0 && SubRanges[ID-1] == nullptr)
+ SubRanges[ID-1] = Intervals[ID]->createSubRange(Allocator, SR.LaneMask);
+ }
+ DistributeRange(SR, SubRanges.data(), VNIMapping);
+ }
+}
+
+static bool subRangeLiveAt(const LiveInterval &LI, SlotIndex Pos) {
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if (SR.liveAt(Pos))
+ return true;
+ }
+ return false;
+}
+
+void RenameIndependentSubregs::computeMainRangesFixFlags(
+ const IntEqClasses &Classes,
+ const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
+ const SmallVectorImpl<LiveInterval*> &Intervals) const {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ const SlotIndexes &Indexes = *LIS->getSlotIndexes();
+ for (size_t I = 0, E = Intervals.size(); I < E; ++I) {
+ LiveInterval &LI = *Intervals[I];
+ unsigned Reg = LI.reg;
+
+ LI.removeEmptySubRanges();
+
+ // There must be a def (or live-in) before every use. Splitting vregs may
+ // violate this principle as the splitted vreg may not have a definition on
+ // every path. Fix this by creating IMPLICIT_DEF instruction as necessary.
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ // Search for "PHI" value numbers in the subranges. We must find a live
+ // value in each predecessor block, add an IMPLICIT_DEF where it is
+ // missing.
+ for (unsigned I = 0; I < SR.valnos.size(); ++I) {
+ const VNInfo &VNI = *SR.valnos[I];
+ if (VNI.isUnused() || !VNI.isPHIDef())
+ continue;
+
+ SlotIndex Def = VNI.def;
+ MachineBasicBlock &MBB = *Indexes.getMBBFromIndex(Def);
+ for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+ SlotIndex PredEnd = Indexes.getMBBEndIdx(PredMBB);
+ if (subRangeLiveAt(LI, PredEnd.getPrevSlot()))
+ continue;
+
+ MachineBasicBlock::iterator InsertPos =
+ llvm::findPHICopyInsertPoint(PredMBB, &MBB, Reg);
+ const MCInstrDesc &MCDesc = TII->get(TargetOpcode::IMPLICIT_DEF);
+ MachineInstrBuilder ImpDef = BuildMI(*PredMBB, InsertPos,
+ DebugLoc(), MCDesc, Reg);
+ SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef);
+ SlotIndex RegDefIdx = DefIdx.getRegSlot();
+ for (LiveInterval::SubRange &SR : LI.subranges()) {
+ VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator);
+ SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI));
+ }
+ }
+ }
+ }
+
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ if (!MO.isDef())
+ continue;
+ unsigned SubRegIdx = MO.getSubReg();
+ if (SubRegIdx == 0)
+ continue;
+ // After assigning the new vreg we may not have any other sublanes living
+ // in and out of the instruction anymore. We need to add new dead and
+ // undef flags in these cases.
+ if (!MO.isUndef()) {
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ if (!subRangeLiveAt(LI, Pos))
+ MO.setIsUndef();
+ }
+ if (!MO.isDead()) {
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()).getDeadSlot();
+ if (!subRangeLiveAt(LI, Pos))
+ MO.setIsDead();
+ }
+ }
+
+ if (I == 0)
+ LI.clear();
+ LIS->constructMainRangeFromSubranges(LI);
+ }
+}
+
+bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
+ // Skip renaming if liveness of subregister is not tracked.
+ if (!MF.getSubtarget().enableSubRegLiveness())
+ return false;
+
+ DEBUG(dbgs() << "Renaming independent subregister live ranges in "
+ << MF.getName() << '\n');
+
+ LIS = &getAnalysis<LiveIntervals>();
+ MRI = &MF.getRegInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+
+ // Iterate over all vregs. Note that we query getNumVirtRegs() the newly
+ // created vregs end up with higher numbers but do not need to be visited as
+ // there can't be any further splitting.
+ bool Changed = false;
+ for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (!LIS->hasInterval(Reg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (!LI.hasSubRanges())
+ continue;
+
+ Changed |= renameComponents(LI);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
new file mode 100644
index 000000000000..19cd59b9dba7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -0,0 +1,851 @@
+//===-- SafeStack.cpp - Safe Stack Insertion ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass splits the stack into the safe stack (kept as-is for LLVM backend)
+// and the unsafe stack (explicitly allocated and managed through the runtime
+// support library).
+//
+// http://clang.llvm.org/docs/SafeStack.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "SafeStackColoring.h"
+#include "SafeStackLayout.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+using namespace llvm::safestack;
+
+#define DEBUG_TYPE "safestack"
+
+enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP };
+
+static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage",
+ cl::Hidden, cl::init(ThreadLocalUSP),
+ cl::desc("Type of storage for the unsafe stack pointer"),
+ cl::values(clEnumValN(ThreadLocalUSP, "thread-local",
+ "Thread-local storage"),
+ clEnumValN(SingleThreadUSP, "single-thread",
+ "Non-thread-local storage"),
+ clEnumValEnd));
+
+namespace llvm {
+
+STATISTIC(NumFunctions, "Total number of functions");
+STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack");
+STATISTIC(NumUnsafeStackRestorePointsFunctions,
+ "Number of functions that use setjmp or exceptions");
+
+STATISTIC(NumAllocas, "Total number of allocas");
+STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas");
+STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas");
+STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments");
+STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");
+
+} // namespace llvm
+
+namespace {
+
+/// Rewrite an SCEV expression for a memory access address to an expression that
+/// represents offset from the given alloca.
+///
+/// The implementation simply replaces all mentions of the alloca with zero.
+class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
+ const Value *AllocaPtr;
+
+public:
+ AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
+ : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ if (Expr->getValue() == AllocaPtr)
+ return SE.getZero(Expr->getType());
+ return Expr;
+ }
+};
+
+/// The SafeStack pass splits the stack of each function into the safe
+/// stack, which is only accessed through memory safe dereferences (as
+/// determined statically), and the unsafe stack, which contains all
+/// local variables that are accessed in ways that we can't prove to
+/// be safe.
+class SafeStack : public FunctionPass {
+ const TargetMachine *TM;
+ const TargetLoweringBase *TL;
+ const DataLayout *DL;
+ ScalarEvolution *SE;
+
+ Type *StackPtrTy;
+ Type *IntPtrTy;
+ Type *Int32Ty;
+ Type *Int8Ty;
+
+ Value *UnsafeStackPtr = nullptr;
+
+ /// Unsafe stack alignment. Each stack frame must ensure that the stack is
+ /// aligned to this value. We need to re-align the unsafe stack if the
+ /// alignment of any object on the stack exceeds this value.
+ ///
+ /// 16 seems like a reasonable upper bound on the alignment of objects that we
+ /// might expect to appear on the stack on most common targets.
+ enum { StackAlignment = 16 };
+
+ /// \brief Build a value representing a pointer to the unsafe stack pointer.
+ Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F);
+
+ /// \brief Return the value of the stack canary.
+ Value *getStackGuard(IRBuilder<> &IRB, Function &F);
+
+ /// \brief Load stack guard from the frame and check if it has changed.
+ void checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
+ AllocaInst *StackGuardSlot, Value *StackGuard);
+
+ /// \brief Find all static allocas, dynamic allocas, return instructions and
+ /// stack restore points (exception unwind blocks and setjmp calls) in the
+ /// given function and append them to the respective vectors.
+ void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
+ SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
+ SmallVectorImpl<ReturnInst *> &Returns,
+ SmallVectorImpl<Instruction *> &StackRestorePoints);
+
+ /// \brief Calculate the allocation size of a given alloca. Returns 0 if the
+ /// size can not be statically determined.
+ uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI);
+
+ /// \brief Allocate space for all static allocas in \p StaticAllocas,
+ /// replace allocas with pointers into the unsafe stack and generate code to
+ /// restore the stack pointer before all return instructions in \p Returns.
+ ///
+ /// \returns A pointer to the top of the unsafe stack after all unsafe static
+ /// allocas are allocated.
+ Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F,
+ ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments,
+ ArrayRef<ReturnInst *> Returns,
+ Instruction *BasePointer,
+ AllocaInst *StackGuardSlot);
+
+ /// \brief Generate code to restore the stack after all stack restore points
+ /// in \p StackRestorePoints.
+ ///
+ /// \returns A local variable in which to maintain the dynamic top of the
+ /// unsafe stack if needed.
+ AllocaInst *
+ createStackRestorePoints(IRBuilder<> &IRB, Function &F,
+ ArrayRef<Instruction *> StackRestorePoints,
+ Value *StaticTop, bool NeedDynamicTop);
+
+ /// \brief Replace all allocas in \p DynamicAllocas with code to allocate
+ /// space dynamically on the unsafe stack and store the dynamic unsafe stack
+ /// top to \p DynamicTop if non-null.
+ void moveDynamicAllocasToUnsafeStack(Function &F, Value *UnsafeStackPtr,
+ AllocaInst *DynamicTop,
+ ArrayRef<AllocaInst *> DynamicAllocas);
+
+ bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize);
+
+ bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr, uint64_t AllocaSize);
+ bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,
+ uint64_t AllocaSize);
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+ SafeStack(const TargetMachine *TM)
+ : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) {
+ initializeSafeStackPass(*PassRegistry::getPassRegistry());
+ }
+ SafeStack() : SafeStack(nullptr) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ }
+
+ bool doInitialization(Module &M) override {
+ DL = &M.getDataLayout();
+
+ StackPtrTy = Type::getInt8PtrTy(M.getContext());
+ IntPtrTy = DL->getIntPtrType(M.getContext());
+ Int32Ty = Type::getInt32Ty(M.getContext());
+ Int8Ty = Type::getInt8Ty(M.getContext());
+
+ return false;
+ }
+
+ bool runOnFunction(Function &F) override;
+}; // class SafeStack
+
+uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
+ uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ auto C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C)
+ return 0;
+ Size *= C->getZExtValue();
+ }
+ return Size;
+}
+
+bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
+ const Value *AllocaPtr, uint64_t AllocaSize) {
+ AllocaOffsetRewriter Rewriter(*SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr));
+
+ uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType());
+ ConstantRange AccessStartRange = SE->getUnsignedRange(Expr);
+ ConstantRange SizeRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize));
+ ConstantRange AccessRange = AccessStartRange.add(SizeRange);
+ ConstantRange AllocaRange =
+ ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize));
+ bool Safe = AllocaRange.contains(AccessRange);
+
+ DEBUG(dbgs() << "[SafeStack] "
+ << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+ << *AllocaPtr << "\n"
+ << " Access " << *Addr << "\n"
+ << " SCEV " << *Expr
+ << " U: " << SE->getUnsignedRange(Expr)
+ << ", S: " << SE->getSignedRange(Expr) << "\n"
+ << " Range " << AccessRange << "\n"
+ << " AllocaRange " << AllocaRange << "\n"
+ << " " << (Safe ? "safe" : "unsafe") << "\n");
+
+ return Safe;
+}
+
+bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr,
+ uint64_t AllocaSize) {
+ // All MemIntrinsics have destination address in Arg0 and size in Arg2.
+ if (MI->getRawDest() != U) return true;
+ const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
+ // Non-constant size => unsafe. FIXME: try SCEV getRange.
+ if (!Len) return false;
+ return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize);
+}
+
+/// Check whether a given allocation must be put on the safe
+/// stack or not. The function analyzes all uses of AI and checks whether it is
+/// only accessed in a memory safe way (as decided statically).
+bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
+ // Go through all uses of this alloca and check whether all accesses to the
+ // allocated object are statically known to be memory safe and, hence, the
+ // object can be placed on the safe stack.
+ SmallPtrSet<const Value *, 16> Visited;
+ SmallVector<const Value *, 8> WorkList;
+ WorkList.push_back(AllocaPtr);
+
+ // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+ while (!WorkList.empty()) {
+ const Value *V = WorkList.pop_back_val();
+ for (const Use &UI : V->uses()) {
+ auto I = cast<const Instruction>(UI.getUser());
+ assert(V == UI.get());
+
+ switch (I->getOpcode()) {
+ case Instruction::Load: {
+ if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr,
+ AllocaSize))
+ return false;
+ break;
+ }
+ case Instruction::VAArg:
+ // "va-arg" from a pointer is safe.
+ break;
+ case Instruction::Store: {
+ if (V == I->getOperand(0)) {
+ // Stored the pointer - conservatively assume it may be unsafe.
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n store of address: " << *I << "\n");
+ return false;
+ }
+
+ if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()),
+ AllocaPtr, AllocaSize))
+ return false;
+ break;
+ }
+ case Instruction::Ret: {
+ // Information leak.
+ return false;
+ }
+
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ ImmutableCallSite CS(I);
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+ }
+
+ if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe memintrinsic: " << *I
+ << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ // LLVM 'nocapture' attribute is only set for arguments whose address
+ // is not stored, passed around, or used in any other non-trivial way.
+ // We assume that passing a pointer to an object as a 'nocapture
+ // readnone' argument is safe.
+ // FIXME: a more precise solution would require an interprocedural
+ // analysis here, which would look at all uses of an argument inside
+ // the function being called.
+ ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
+ if (A->get() == V)
+ if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
+ CS.doesNotAccessMemory()))) {
+ DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+ << "\n unsafe call: " << *I << "\n");
+ return false;
+ }
+ continue;
+ }
+
+ default:
+ if (Visited.insert(I).second)
+ WorkList.push_back(cast<const Instruction>(I));
+ }
+ }
+ }
+
+ // All uses of the alloca are safe, we can place it on the safe stack.
+ return true;
+}
+
+Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) {
+ // Check if there is a target-specific location for the unsafe stack pointer.
+ if (TL)
+ if (Value *V = TL->getSafeStackPointerLocation(IRB))
+ return V;
+
+ // Otherwise, assume the target links with compiler-rt, which provides a
+ // thread-local variable with a magic name.
+ Module &M = *F.getParent();
+ const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+ auto UnsafeStackPtr =
+ dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar));
+
+ bool UseTLS = USPStorage == ThreadLocalUSP;
+
+ if (!UnsafeStackPtr) {
+ auto TLSModel = UseTLS ?
+ GlobalValue::InitialExecTLSModel :
+ GlobalValue::NotThreadLocal;
+ // The global variable is not defined yet, define it ourselves.
+ // We use the initial-exec TLS model because we do not support the
+ // variable living anywhere other than in the main executable.
+ UnsafeStackPtr = new GlobalVariable(
+ M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
+ UnsafeStackPtrVar, nullptr, TLSModel);
+ } else {
+ // The variable exists, check its type and attributes.
+ if (UnsafeStackPtr->getValueType() != StackPtrTy)
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
+ if (UseTLS != UnsafeStackPtr->isThreadLocal())
+ report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
+ (UseTLS ? "" : "not ") + "be thread-local");
+ }
+ return UnsafeStackPtr;
+}
+
+Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
+ Value *StackGuardVar = nullptr;
+ if (TL)
+ StackGuardVar = TL->getIRStackGuard(IRB);
+ if (!StackGuardVar)
+ StackGuardVar =
+ F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
+ return IRB.CreateLoad(StackGuardVar, "StackGuard");
+}
+
+void SafeStack::findInsts(Function &F,
+ SmallVectorImpl<AllocaInst *> &StaticAllocas,
+ SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+ SmallVectorImpl<Argument *> &ByValArguments,
+ SmallVectorImpl<ReturnInst *> &Returns,
+ SmallVectorImpl<Instruction *> &StackRestorePoints) {
+ for (Instruction &I : instructions(&F)) {
+ if (auto AI = dyn_cast<AllocaInst>(&I)) {
+ ++NumAllocas;
+
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (IsSafeStackAlloca(AI, Size))
+ continue;
+
+ if (AI->isStaticAlloca()) {
+ ++NumUnsafeStaticAllocas;
+ StaticAllocas.push_back(AI);
+ } else {
+ ++NumUnsafeDynamicAllocas;
+ DynamicAllocas.push_back(AI);
+ }
+ } else if (auto RI = dyn_cast<ReturnInst>(&I)) {
+ Returns.push_back(RI);
+ } else if (auto CI = dyn_cast<CallInst>(&I)) {
+ // setjmps require stack restore.
+ if (CI->getCalledFunction() && CI->canReturnTwice())
+ StackRestorePoints.push_back(CI);
+ } else if (auto LP = dyn_cast<LandingPadInst>(&I)) {
+ // Exception landing pads require stack restore.
+ StackRestorePoints.push_back(LP);
+ } else if (auto II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() == Intrinsic::gcroot)
+ llvm::report_fatal_error(
+ "gcroot intrinsic not compatible with safestack attribute");
+ }
+ }
+ for (Argument &Arg : F.args()) {
+ if (!Arg.hasByValAttr())
+ continue;
+ uint64_t Size =
+ DL->getTypeStoreSize(Arg.getType()->getPointerElementType());
+ if (IsSafeStackAlloca(&Arg, Size))
+ continue;
+
+ ++NumUnsafeByValArguments;
+ ByValArguments.push_back(&Arg);
+ }
+}
+
+AllocaInst *
+SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
+ ArrayRef<Instruction *> StackRestorePoints,
+ Value *StaticTop, bool NeedDynamicTop) {
+ assert(StaticTop && "The stack top isn't set.");
+
+ if (StackRestorePoints.empty())
+ return nullptr;
+
+ // We need the current value of the shadow stack pointer to restore
+ // after longjmp or exception catching.
+
+ // FIXME: On some platforms this could be handled by the longjmp/exception
+ // runtime itself.
+
+ AllocaInst *DynamicTop = nullptr;
+ if (NeedDynamicTop) {
+ // If we also have dynamic alloca's, the stack pointer value changes
+ // throughout the function. For now we store it in an alloca.
+ DynamicTop = IRB.CreateAlloca(StackPtrTy, /*ArraySize=*/nullptr,
+ "unsafe_stack_dynamic_ptr");
+ IRB.CreateStore(StaticTop, DynamicTop);
+ }
+
+ // Restore current stack pointer after longjmp/exception catch.
+ for (Instruction *I : StackRestorePoints) {
+ ++NumUnsafeStackRestorePoints;
+
+ IRB.SetInsertPoint(I->getNextNode());
+ Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop;
+ IRB.CreateStore(CurrentTop, UnsafeStackPtr);
+ }
+
+ return DynamicTop;
+}
+
+void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
+ AllocaInst *StackGuardSlot, Value *StackGuard) {
+ Value *V = IRB.CreateLoad(StackGuardSlot);
+ Value *Cmp = IRB.CreateICmpNE(StackGuard, V);
+
+ auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true);
+ auto FailureProb = BranchProbabilityInfo::getBranchProbStackProtector(false);
+ MDNode *Weights = MDBuilder(F.getContext())
+ .createBranchWeights(SuccessProb.getNumerator(),
+ FailureProb.getNumerator());
+ Instruction *CheckTerm =
+ SplitBlockAndInsertIfThen(Cmp, &RI,
+ /* Unreachable */ true, Weights);
+ IRBuilder<> IRBFail(CheckTerm);
+ // FIXME: respect -fsanitize-trap / -ftrap-function here?
+ Constant *StackChkFail = F.getParent()->getOrInsertFunction(
+ "__stack_chk_fail", IRB.getVoidTy(), nullptr);
+ IRBFail.CreateCall(StackChkFail, {});
+}
+
+/// We explicitly compute and set the unsafe stack layout for all unsafe
+/// static alloca instructions. We save the unsafe "base pointer" in the
+/// prologue into a local variable and restore it in the epilogue.
+Value *SafeStack::moveStaticAllocasToUnsafeStack(
+ IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas,
+ ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns,
+ Instruction *BasePointer, AllocaInst *StackGuardSlot) {
+ if (StaticAllocas.empty() && ByValArguments.empty())
+ return BasePointer;
+
+ DIBuilder DIB(*F.getParent());
+
+ StackColoring SSC(F, StaticAllocas);
+ SSC.run();
+ SSC.removeAllMarkers();
+
+ // Unsafe stack always grows down.
+ StackLayout SSL(StackAlignment);
+ if (StackGuardSlot) {
+ Type *Ty = StackGuardSlot->getAllocatedType();
+ unsigned Align =
+ std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
+ SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
+ Align, SSC.getLiveRange(StackGuardSlot));
+ }
+
+ for (Argument *Arg : ByValArguments) {
+ Type *Ty = Arg->getType()->getPointerElementType();
+ uint64_t Size = DL->getTypeStoreSize(Ty);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ // Ensure the object is properly aligned.
+ unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+ Arg->getParamAlignment());
+ SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
+ }
+
+ for (AllocaInst *AI : StaticAllocas) {
+ Type *Ty = AI->getAllocatedType();
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ // Ensure the object is properly aligned.
+ unsigned Align =
+ std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment());
+
+ SSL.addObject(AI, Size, Align, SSC.getLiveRange(AI));
+ }
+
+ SSL.computeLayout();
+ unsigned FrameAlignment = SSL.getFrameAlignment();
+
+ // FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location
+ // (AlignmentSkew).
+ if (FrameAlignment > StackAlignment) {
+ // Re-align the base pointer according to the max requested alignment.
+ assert(isPowerOf2_32(FrameAlignment));
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+ BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
+ IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
+ ConstantInt::get(IntPtrTy, ~uint64_t(FrameAlignment - 1))),
+ StackPtrTy));
+ }
+
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+
+ if (StackGuardSlot) {
+ unsigned Offset = SSL.getObjectOffset(StackGuardSlot);
+ Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -Offset));
+ Value *NewAI =
+ IRB.CreateBitCast(Off, StackGuardSlot->getType(), "StackGuardSlot");
+
+ // Replace alloc with the new location.
+ StackGuardSlot->replaceAllUsesWith(NewAI);
+ StackGuardSlot->eraseFromParent();
+ }
+
+ for (Argument *Arg : ByValArguments) {
+ unsigned Offset = SSL.getObjectOffset(Arg);
+ Type *Ty = Arg->getType()->getPointerElementType();
+
+ uint64_t Size = DL->getTypeStoreSize(Ty);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -Offset));
+ Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(),
+ Arg->getName() + ".unsafe-byval");
+
+ // Replace alloc with the new location.
+ replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
+ /*Deref=*/true, -Offset);
+ Arg->replaceAllUsesWith(NewArg);
+ IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
+ IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
+ }
+
+ // Allocate space for every unsafe static AllocaInst on the unsafe stack.
+ for (AllocaInst *AI : StaticAllocas) {
+ IRB.SetInsertPoint(AI);
+ unsigned Offset = SSL.getObjectOffset(AI);
+
+ uint64_t Size = getStaticAllocaAllocationSize(AI);
+ if (Size == 0)
+ Size = 1; // Don't create zero-sized stack objects.
+
+ replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -Offset);
+ replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
+
+ // Replace uses of the alloca with the new location.
+ // Insert address calculation close to each use to work around PR27844.
+ std::string Name = std::string(AI->getName()) + ".unsafe";
+ while (!AI->use_empty()) {
+ Use &U = *AI->use_begin();
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Instruction *InsertBefore;
+ if (auto *PHI = dyn_cast<PHINode>(User))
+ InsertBefore = PHI->getIncomingBlock(U)->getTerminator();
+ else
+ InsertBefore = User;
+
+ IRBuilder<> IRBUser(InsertBefore);
+ Value *Off = IRBUser.CreateGEP(BasePointer, // BasePointer is i8*
+ ConstantInt::get(Int32Ty, -Offset));
+ Value *Replacement = IRBUser.CreateBitCast(Off, AI->getType(), Name);
+
+ if (auto *PHI = dyn_cast<PHINode>(User)) {
+ // PHI nodes may have multiple incoming edges from the same BB (why??),
+ // all must be updated at once with the same incoming value.
+ auto *BB = PHI->getIncomingBlock(U);
+ for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I)
+ if (PHI->getIncomingBlock(I) == BB)
+ PHI->setIncomingValue(I, Replacement);
+ } else {
+ U.set(Replacement);
+ }
+ }
+
+ AI->eraseFromParent();
+ }
+
+ // Re-align BasePointer so that our callees would see it aligned as
+ // expected.
+ // FIXME: no need to update BasePointer in leaf functions.
+ unsigned FrameSize = alignTo(SSL.getFrameSize(), StackAlignment);
+
+ // Update shadow stack pointer in the function epilogue.
+ IRB.SetInsertPoint(BasePointer->getNextNode());
+
+ Value *StaticTop =
+ IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -FrameSize),
+ "unsafe_stack_static_top");
+ IRB.CreateStore(StaticTop, UnsafeStackPtr);
+ return StaticTop;
+}
+
+void SafeStack::moveDynamicAllocasToUnsafeStack(
+ Function &F, Value *UnsafeStackPtr, AllocaInst *DynamicTop,
+ ArrayRef<AllocaInst *> DynamicAllocas) {
+ DIBuilder DIB(*F.getParent());
+
+ for (AllocaInst *AI : DynamicAllocas) {
+ IRBuilder<> IRB(AI);
+
+ // Compute the new SP value (after AI).
+ Value *ArraySize = AI->getArraySize();
+ if (ArraySize->getType() != IntPtrTy)
+ ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false);
+
+ Type *Ty = AI->getAllocatedType();
+ uint64_t TySize = DL->getTypeAllocSize(Ty);
+ Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize));
+
+ Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy);
+ SP = IRB.CreateSub(SP, Size);
+
+ // Align the SP value to satisfy the AllocaInst, type and stack alignments.
+ unsigned Align = std::max(
+ std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()),
+ (unsigned)StackAlignment);
+
+ assert(isPowerOf2_32(Align));
+ Value *NewTop = IRB.CreateIntToPtr(
+ IRB.CreateAnd(SP, ConstantInt::get(IntPtrTy, ~uint64_t(Align - 1))),
+ StackPtrTy);
+
+ // Save the stack pointer.
+ IRB.CreateStore(NewTop, UnsafeStackPtr);
+ if (DynamicTop)
+ IRB.CreateStore(NewTop, DynamicTop);
+
+ Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType());
+ if (AI->hasName() && isa<Instruction>(NewAI))
+ NewAI->takeName(AI);
+
+ replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true);
+ AI->replaceAllUsesWith(NewAI);
+ AI->eraseFromParent();
+ }
+
+ if (!DynamicAllocas.empty()) {
+ // Now go through the instructions again, replacing stacksave/stackrestore.
+ for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) {
+ Instruction *I = &*(It++);
+ auto II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ continue;
+
+ if (II->getIntrinsicID() == Intrinsic::stacksave) {
+ IRBuilder<> IRB(II);
+ Instruction *LI = IRB.CreateLoad(UnsafeStackPtr);
+ LI->takeName(II);
+ II->replaceAllUsesWith(LI);
+ II->eraseFromParent();
+ } else if (II->getIntrinsicID() == Intrinsic::stackrestore) {
+ IRBuilder<> IRB(II);
+ Instruction *SI = IRB.CreateStore(II->getArgOperand(0), UnsafeStackPtr);
+ SI->takeName(II);
+ assert(II->use_empty());
+ II->eraseFromParent();
+ }
+ }
+ }
+}
+
+bool SafeStack::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
+
+ if (!F.hasFnAttribute(Attribute::SafeStack)) {
+ DEBUG(dbgs() << "[SafeStack] safestack is not requested"
+ " for this function\n");
+ return false;
+ }
+
+ if (F.isDeclaration()) {
+ DEBUG(dbgs() << "[SafeStack] function definition"
+ " is not available\n");
+ return false;
+ }
+
+ TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr;
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+ ++NumFunctions;
+
+ SmallVector<AllocaInst *, 16> StaticAllocas;
+ SmallVector<AllocaInst *, 4> DynamicAllocas;
+ SmallVector<Argument *, 4> ByValArguments;
+ SmallVector<ReturnInst *, 4> Returns;
+
+ // Collect all points where stack gets unwound and needs to be restored
+ // This is only necessary because the runtime (setjmp and unwind code) is
+ // not aware of the unsafe stack and won't unwind/restore it prorerly.
+ // To work around this problem without changing the runtime, we insert
+ // instrumentation to restore the unsafe stack pointer when necessary.
+ SmallVector<Instruction *, 4> StackRestorePoints;
+
+ // Find all static and dynamic alloca instructions that must be moved to the
+ // unsafe stack, all return instructions and stack restore points.
+ findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns,
+ StackRestorePoints);
+
+ if (StaticAllocas.empty() && DynamicAllocas.empty() &&
+ ByValArguments.empty() && StackRestorePoints.empty())
+ return false; // Nothing to do in this function.
+
+ if (!StaticAllocas.empty() || !DynamicAllocas.empty() ||
+ !ByValArguments.empty())
+ ++NumUnsafeStackFunctions; // This function has the unsafe stack.
+
+ if (!StackRestorePoints.empty())
+ ++NumUnsafeStackRestorePointsFunctions;
+
+ IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
+ UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F);
+
+ // Load the current stack pointer (we'll also use it as a base pointer).
+ // FIXME: use a dedicated register for it ?
+ Instruction *BasePointer =
+ IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr");
+ assert(BasePointer->getType() == StackPtrTy);
+
+ AllocaInst *StackGuardSlot = nullptr;
+ // FIXME: implement weaker forms of stack protector.
+ if (F.hasFnAttribute(Attribute::StackProtect) ||
+ F.hasFnAttribute(Attribute::StackProtectStrong) ||
+ F.hasFnAttribute(Attribute::StackProtectReq)) {
+ Value *StackGuard = getStackGuard(IRB, F);
+ StackGuardSlot = IRB.CreateAlloca(StackPtrTy, nullptr);
+ IRB.CreateStore(StackGuard, StackGuardSlot);
+
+ for (ReturnInst *RI : Returns) {
+ IRBuilder<> IRBRet(RI);
+ checkStackGuard(IRBRet, F, *RI, StackGuardSlot, StackGuard);
+ }
+ }
+
+ // The top of the unsafe stack after all unsafe static allocas are
+ // allocated.
+ Value *StaticTop =
+ moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, ByValArguments,
+ Returns, BasePointer, StackGuardSlot);
+
+ // Safe stack object that stores the current unsafe stack top. It is updated
+ // as unsafe dynamic (non-constant-sized) allocas are allocated and freed.
+ // This is only needed if we need to restore stack pointer after longjmp
+ // or exceptions, and we have dynamic allocations.
+ // FIXME: a better alternative might be to store the unsafe stack pointer
+ // before setjmp / invoke instructions.
+ AllocaInst *DynamicTop = createStackRestorePoints(
+ IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
+
+ // Handle dynamic allocas.
+ moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop,
+ DynamicAllocas);
+
+ // Restore the unsafe stack pointer before each return.
+ for (ReturnInst *RI : Returns) {
+ IRB.SetInsertPoint(RI);
+ IRB.CreateStore(BasePointer, UnsafeStackPtr);
+ }
+
+ DEBUG(dbgs() << "[SafeStack] safestack applied\n");
+ return true;
+}
+
+} // anonymous namespace
+
+char SafeStack::ID = 0;
+INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
+INITIALIZE_TM_PASS_END(SafeStack, "safe-stack",
+ "Safe Stack instrumentation pass", false, false)
+
+FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) {
+ return new SafeStack(TM);
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
new file mode 100644
index 000000000000..709614f57e7d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
@@ -0,0 +1,289 @@
+//===-- SafeStackColoring.cpp - SafeStack frame coloring -------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SafeStackColoring.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::safestack;
+
+#define DEBUG_TYPE "safestackcoloring"
+
+static cl::opt<bool> ClColoring("safe-stack-coloring",
+ cl::desc("enable safe stack coloring"),
+ cl::Hidden, cl::init(true));
+
+const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
+ return LiveRanges[AllocaNumbering[AI]];
+}
+
+bool StackColoring::readMarker(Instruction *I, bool *IsStart) {
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end))
+ return false;
+
+ *IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start;
+ return true;
+}
+
+void StackColoring::removeAllMarkers() {
+ for (auto *I : Markers) {
+ auto *Op = dyn_cast<Instruction>(I->getOperand(1));
+ I->eraseFromParent();
+ // Remove the operand bitcast, too, if it has no more uses left.
+ if (Op && Op->use_empty())
+ Op->eraseFromParent();
+ }
+}
+
+void StackColoring::collectMarkers() {
+ InterestingAllocas.resize(NumAllocas);
+ DenseMap<BasicBlock *, SmallDenseMap<Instruction *, Marker>> BBMarkerSet;
+
+ // Compute the set of start/end markers per basic block.
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
+ AllocaInst *AI = Allocas[AllocaNo];
+ SmallVector<Instruction *, 8> WorkList;
+ WorkList.push_back(AI);
+ while (!WorkList.empty()) {
+ Instruction *I = WorkList.pop_back_val();
+ for (User *U : I->users()) {
+ if (auto *BI = dyn_cast<BitCastInst>(U)) {
+ WorkList.push_back(BI);
+ continue;
+ }
+ auto *UI = dyn_cast<Instruction>(U);
+ if (!UI)
+ continue;
+ bool IsStart;
+ if (!readMarker(UI, &IsStart))
+ continue;
+ if (IsStart)
+ InterestingAllocas.set(AllocaNo);
+ BBMarkerSet[UI->getParent()][UI] = {AllocaNo, IsStart};
+ Markers.push_back(UI);
+ }
+ }
+ }
+
+ // Compute instruction numbering. Only the following instructions are
+ // considered:
+ // * Basic block entries
+ // * Lifetime markers
+ // For each basic block, compute
+ // * the list of markers in the instruction order
+ // * the sets of allocas whose lifetime starts or ends in this BB
+ DEBUG(dbgs() << "Instructions:\n");
+ unsigned InstNo = 0;
+ for (BasicBlock *BB : depth_first(&F)) {
+ DEBUG(dbgs() << " " << InstNo << ": BB " << BB->getName() << "\n");
+ unsigned BBStart = InstNo++;
+
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
+ BlockInfo.Begin.resize(NumAllocas);
+ BlockInfo.End.resize(NumAllocas);
+ BlockInfo.LiveIn.resize(NumAllocas);
+ BlockInfo.LiveOut.resize(NumAllocas);
+
+ auto &BlockMarkerSet = BBMarkerSet[BB];
+ if (BlockMarkerSet.empty()) {
+ unsigned BBEnd = InstNo;
+ BlockInstRange[BB] = std::make_pair(BBStart, BBEnd);
+ continue;
+ }
+
+ auto ProcessMarker = [&](Instruction *I, const Marker &M) {
+ DEBUG(dbgs() << " " << InstNo << ": "
+ << (M.IsStart ? "start " : "end ") << M.AllocaNo << ", "
+ << *I << "\n");
+
+ BBMarkers[BB].push_back({InstNo, M});
+
+ InstructionNumbering[I] = InstNo++;
+
+ if (M.IsStart) {
+ if (BlockInfo.End.test(M.AllocaNo))
+ BlockInfo.End.reset(M.AllocaNo);
+ BlockInfo.Begin.set(M.AllocaNo);
+ } else {
+ if (BlockInfo.Begin.test(M.AllocaNo))
+ BlockInfo.Begin.reset(M.AllocaNo);
+ BlockInfo.End.set(M.AllocaNo);
+ }
+ };
+
+ if (BlockMarkerSet.size() == 1) {
+ ProcessMarker(BlockMarkerSet.begin()->getFirst(),
+ BlockMarkerSet.begin()->getSecond());
+ } else {
+ // Scan the BB to determine the marker order.
+ for (Instruction &I : *BB) {
+ auto It = BlockMarkerSet.find(&I);
+ if (It == BlockMarkerSet.end())
+ continue;
+ ProcessMarker(&I, It->getSecond());
+ }
+ }
+
+ unsigned BBEnd = InstNo;
+ BlockInstRange[BB] = std::make_pair(BBStart, BBEnd);
+ }
+ NumInst = InstNo;
+}
+
+void StackColoring::calculateLocalLiveness() {
+ bool changed = true;
+ while (changed) {
+ changed = false;
+
+ for (BasicBlock *BB : depth_first(&F)) {
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
+
+ // Compute LiveIn by unioning together the LiveOut sets of all preds.
+ BitVector LocalLiveIn;
+ for (auto *PredBB : predecessors(BB)) {
+ LivenessMap::const_iterator I = BlockLiveness.find(PredBB);
+ assert(I != BlockLiveness.end() && "Predecessor not found");
+ LocalLiveIn |= I->second.LiveOut;
+ }
+
+ // Compute LiveOut by subtracting out lifetimes that end in this
+ // block, then adding in lifetimes that begin in this block. If
+ // we have both BEGIN and END markers in the same basic block
+ // then we know that the BEGIN marker comes after the END,
+ // because we already handle the case where the BEGIN comes
+ // before the END when collecting the markers (and building the
+ // BEGIN/END vectors).
+ BitVector LocalLiveOut = LocalLiveIn;
+ LocalLiveOut.reset(BlockInfo.End);
+ LocalLiveOut |= BlockInfo.Begin;
+
+ // Update block LiveIn set, noting whether it has changed.
+ if (LocalLiveIn.test(BlockInfo.LiveIn)) {
+ changed = true;
+ BlockInfo.LiveIn |= LocalLiveIn;
+ }
+
+ // Update block LiveOut set, noting whether it has changed.
+ if (LocalLiveOut.test(BlockInfo.LiveOut)) {
+ changed = true;
+ BlockInfo.LiveOut |= LocalLiveOut;
+ }
+ }
+ } // while changed.
+}
+
+void StackColoring::calculateLiveIntervals() {
+ for (auto IT : BlockLiveness) {
+ BasicBlock *BB = IT.getFirst();
+ BlockLifetimeInfo &BlockInfo = IT.getSecond();
+ unsigned BBStart, BBEnd;
+ std::tie(BBStart, BBEnd) = BlockInstRange[BB];
+
+ BitVector Started, Ended;
+ Started.resize(NumAllocas);
+ Ended.resize(NumAllocas);
+ SmallVector<unsigned, 8> Start;
+ Start.resize(NumAllocas);
+
+ // LiveIn ranges start at the first instruction.
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
+ if (BlockInfo.LiveIn.test(AllocaNo)) {
+ Started.set(AllocaNo);
+ Start[AllocaNo] = BBStart;
+ }
+ }
+
+ for (auto &It : BBMarkers[BB]) {
+ unsigned InstNo = It.first;
+ bool IsStart = It.second.IsStart;
+ unsigned AllocaNo = It.second.AllocaNo;
+
+ if (IsStart) {
+ assert(!Started.test(AllocaNo));
+ Started.set(AllocaNo);
+ Ended.reset(AllocaNo);
+ Start[AllocaNo] = InstNo;
+ } else {
+ assert(!Ended.test(AllocaNo));
+ if (Started.test(AllocaNo)) {
+ LiveRanges[AllocaNo].AddRange(Start[AllocaNo], InstNo);
+ Started.reset(AllocaNo);
+ }
+ Ended.set(AllocaNo);
+ }
+ }
+
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo)
+ if (Started.test(AllocaNo))
+ LiveRanges[AllocaNo].AddRange(Start[AllocaNo], BBEnd);
+ }
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpAllocas() {
+ dbgs() << "Allocas:\n";
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo)
+ dbgs() << " " << AllocaNo << ": " << *Allocas[AllocaNo] << "\n";
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpBlockLiveness() {
+ dbgs() << "Block liveness:\n";
+ for (auto IT : BlockLiveness) {
+ BasicBlock *BB = IT.getFirst();
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[BB];
+ auto BlockRange = BlockInstRange[BB];
+ dbgs() << " BB [" << BlockRange.first << ", " << BlockRange.second
+ << "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End
+ << ", livein " << BlockInfo.LiveIn << ", liveout "
+ << BlockInfo.LiveOut << "\n";
+ }
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() {
+ dbgs() << "Alloca liveness:\n";
+ for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) {
+ LiveRange &Range = LiveRanges[AllocaNo];
+ dbgs() << " " << AllocaNo << ": " << Range << "\n";
+ }
+}
+
+void StackColoring::run() {
+ DEBUG(dumpAllocas());
+
+ for (unsigned I = 0; I < NumAllocas; ++I)
+ AllocaNumbering[Allocas[I]] = I;
+ LiveRanges.resize(NumAllocas);
+
+ collectMarkers();
+
+ if (!ClColoring) {
+ for (auto &R : LiveRanges) {
+ R.SetMaximum(1);
+ R.AddRange(0, 1);
+ }
+ return;
+ }
+
+ for (auto &R : LiveRanges)
+ R.SetMaximum(NumInst);
+ for (unsigned I = 0; I < NumAllocas; ++I)
+ if (!InterestingAllocas.test(I))
+ LiveRanges[I] = getFullLiveRange();
+
+ calculateLocalLiveness();
+ DEBUG(dumpBlockLiveness());
+ calculateLiveIntervals();
+ DEBUG(dumpLiveRanges());
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.h b/contrib/llvm/lib/CodeGen/SafeStackColoring.h
new file mode 100644
index 000000000000..08b179ccb7f1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.h
@@ -0,0 +1,149 @@
+//===-- SafeStackColoring.h - SafeStack frame coloring ---------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
+#define LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/raw_os_ostream.h"
+
+namespace llvm {
+class AllocaInst;
+
+namespace safestack {
+/// Compute live ranges of allocas.
+/// Live ranges are represented as sets of "interesting" instructions, which are
+/// defined as instructions that may start or end an alloca's lifetime. These
+/// are:
+/// * lifetime.start and lifetime.end intrinsics
+/// * first instruction of any basic block
+/// Interesting instructions are numbered in the depth-first walk of the CFG,
+/// and in the program order inside each basic block.
+class StackColoring {
+ /// A class representing liveness information for a single basic block.
+ /// Each bit in the BitVector represents the liveness property
+ /// for a different stack slot.
+ struct BlockLifetimeInfo {
+ /// Which slots BEGINs in each basic block.
+ BitVector Begin;
+ /// Which slots ENDs in each basic block.
+ BitVector End;
+ /// Which slots are marked as LIVE_IN, coming into each basic block.
+ BitVector LiveIn;
+ /// Which slots are marked as LIVE_OUT, coming out of each basic block.
+ BitVector LiveOut;
+ };
+
+public:
+ /// This class represents a set of interesting instructions where an alloca is
+ /// live.
+ struct LiveRange {
+ BitVector bv;
+ void SetMaximum(int size) { bv.resize(size); }
+ void AddRange(unsigned start, unsigned end) { bv.set(start, end); }
+ bool Overlaps(const LiveRange &Other) const {
+ return bv.anyCommon(Other.bv);
+ }
+ void Join(const LiveRange &Other) { bv |= Other.bv; }
+ };
+
+private:
+ Function &F;
+
+ /// Maps active slots (per bit) for each basic block.
+ typedef DenseMap<BasicBlock *, BlockLifetimeInfo> LivenessMap;
+ LivenessMap BlockLiveness;
+
+ /// Number of interesting instructions.
+ int NumInst;
+ /// Numeric ids for interesting instructions.
+ DenseMap<Instruction *, unsigned> InstructionNumbering;
+ /// A range [Start, End) of instruction ids for each basic block.
+ /// Instructions inside each BB have monotonic and consecutive ids.
+ DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
+
+ ArrayRef<AllocaInst *> Allocas;
+ unsigned NumAllocas;
+ DenseMap<AllocaInst *, unsigned> AllocaNumbering;
+ /// LiveRange for allocas.
+ SmallVector<LiveRange, 8> LiveRanges;
+
+ /// The set of allocas that have at least one lifetime.start. All other
+ /// allocas get LiveRange that corresponds to the entire function.
+ BitVector InterestingAllocas;
+ SmallVector<Instruction *, 8> Markers;
+
+ struct Marker {
+ unsigned AllocaNo;
+ bool IsStart;
+ };
+
+ /// List of {InstNo, {AllocaNo, IsStart}} for each BB, ordered by InstNo.
+ DenseMap<BasicBlock *, SmallVector<std::pair<unsigned, Marker>, 4>> BBMarkers;
+
+ void dumpAllocas();
+ void dumpBlockLiveness();
+ void dumpLiveRanges();
+
+ bool readMarker(Instruction *I, bool *IsStart);
+ void collectMarkers();
+ void calculateLocalLiveness();
+ void calculateLiveIntervals();
+
+public:
+ StackColoring(Function &F, ArrayRef<AllocaInst *> Allocas)
+ : F(F), NumInst(-1), Allocas(Allocas), NumAllocas(Allocas.size()) {}
+
+ void run();
+ void removeAllMarkers();
+
+ /// Returns a set of "interesting" instructions where the given alloca is
+ /// live. Not all instructions in a function are interesting: we pick a set
+ /// that is large enough for LiveRange::Overlaps to be correct.
+ const LiveRange &getLiveRange(AllocaInst *AI);
+
+ /// Returns a live range that represents an alloca that is live throughout the
+ /// entire function.
+ LiveRange getFullLiveRange() {
+ assert(NumInst >= 0);
+ LiveRange R;
+ R.SetMaximum(NumInst);
+ R.AddRange(0, NumInst);
+ return R;
+ }
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const BitVector &V) {
+ OS << "{";
+ int idx = V.find_first();
+ bool first = true;
+ while (idx >= 0) {
+ if (!first) {
+ OS << ", ";
+ }
+ first = false;
+ OS << idx;
+ idx = V.find_next(idx);
+ }
+ OS << "}";
+ return OS;
+}
+
+static inline raw_ostream &operator<<(raw_ostream &OS,
+ const StackColoring::LiveRange &R) {
+ return OS << R.bv;
+}
+
+} // namespace safestack
+} // namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H
diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
new file mode 100644
index 000000000000..b8190e0f2153
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -0,0 +1,138 @@
+//===-- SafeStackLayout.cpp - SafeStack frame layout -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SafeStackLayout.h"
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::safestack;
+
+#define DEBUG_TYPE "safestacklayout"
+
+static cl::opt<bool> ClLayout("safe-stack-layout",
+ cl::desc("enable safe stack layout"), cl::Hidden,
+ cl::init(true));
+
+LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
+ OS << "Stack regions:\n";
+ for (unsigned i = 0; i < Regions.size(); ++i) {
+ OS << " " << i << ": [" << Regions[i].Start << ", " << Regions[i].End
+ << "), range " << Regions[i].Range << "\n";
+ }
+ OS << "Stack objects:\n";
+ for (auto &IT : ObjectOffsets) {
+ OS << " at " << IT.getSecond() << ": " << *IT.getFirst() << "\n";
+ }
+}
+
+void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
+ const StackColoring::LiveRange &Range) {
+ StackObjects.push_back({V, Size, Alignment, Range});
+ MaxAlignment = std::max(MaxAlignment, Alignment);
+}
+
+static unsigned AdjustStackOffset(unsigned Offset, unsigned Size,
+ unsigned Alignment) {
+ return alignTo(Offset + Size, Alignment) - Size;
+}
+
+void StackLayout::layoutObject(StackObject &Obj) {
+ if (!ClLayout) {
+ // If layout is disabled, just grab the next aligned address.
+ // This effectively disables stack coloring as well.
+ unsigned LastRegionEnd = Regions.empty() ? 0 : Regions.back().End;
+ unsigned Start = AdjustStackOffset(LastRegionEnd, Obj.Size, Obj.Alignment);
+ unsigned End = Start + Obj.Size;
+ Regions.emplace_back(Start, End, Obj.Range);
+ ObjectOffsets[Obj.Handle] = End;
+ return;
+ }
+
+ DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align " << Obj.Alignment
+ << ", range " << Obj.Range << "\n");
+ assert(Obj.Alignment <= MaxAlignment);
+ unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment);
+ unsigned End = Start + Obj.Size;
+ DEBUG(dbgs() << " First candidate: " << Start << " .. " << End << "\n");
+ for (const StackRegion &R : Regions) {
+ DEBUG(dbgs() << " Examining region: " << R.Start << " .. " << R.End
+ << ", range " << R.Range << "\n");
+ assert(End >= R.Start);
+ if (Start >= R.End) {
+ DEBUG(dbgs() << " Does not intersect, skip.\n");
+ continue;
+ }
+ if (Obj.Range.Overlaps(R.Range)) {
+ // Find the next appropriate location.
+ Start = AdjustStackOffset(R.End, Obj.Size, Obj.Alignment);
+ End = Start + Obj.Size;
+ DEBUG(dbgs() << " Overlaps. Next candidate: " << Start << " .. " << End
+ << "\n");
+ continue;
+ }
+ if (End <= R.End) {
+ DEBUG(dbgs() << " Reusing region(s).\n");
+ break;
+ }
+ }
+
+ unsigned LastRegionEnd = Regions.empty() ? 0 : Regions.back().End;
+ if (End > LastRegionEnd) {
+ // Insert a new region at the end. Maybe two.
+ if (Start > LastRegionEnd) {
+ DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. "
+ << Start << "\n");
+ Regions.emplace_back(LastRegionEnd, Start, StackColoring::LiveRange());
+ LastRegionEnd = Start;
+ }
+ DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. " << End
+ << ", range " << Obj.Range << "\n");
+ Regions.emplace_back(LastRegionEnd, End, Obj.Range);
+ LastRegionEnd = End;
+ }
+
+ // Split starting and ending regions if necessary.
+ for (StackRegion &R : Regions) {
+ if (Start > R.Start && Start < R.End) {
+ StackRegion R0 = R;
+ R.Start = R0.End = Start;
+ Regions.insert(&R, R0);
+ continue;
+ }
+ if (End > R.Start && End < R.End) {
+ StackRegion R0 = R;
+ R0.End = R.Start = End;
+ Regions.insert(&R, R0);
+ break;
+ }
+ }
+
+ // Update live ranges for all affected regions.
+ for (StackRegion &R : Regions) {
+ if (Start < R.End && End > R.Start)
+ R.Range.Join(Obj.Range);
+ if (End <= R.End)
+ break;
+ }
+
+ ObjectOffsets[Obj.Handle] = End;
+}
+
+void StackLayout::computeLayout() {
+ // Simple greedy algorithm.
+ // If this is replaced with something smarter, it must preserve the property
+ // that the first object is always at the offset 0 in the stack frame (for
+ // StackProtectorSlot), or handle stack protector in some other way.
+ for (auto &Obj : StackObjects)
+ layoutObject(Obj);
+
+ DEBUG(print(dbgs()));
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm/lib/CodeGen/SafeStackLayout.h
new file mode 100644
index 000000000000..313ed21c8869
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.h
@@ -0,0 +1,68 @@
+//===-- SafeStackLayout.h - SafeStack frame layout -------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
+#define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
+
+#include "SafeStackColoring.h"
+
+namespace llvm {
+namespace safestack {
+
+/// Compute the layout of an unsafe stack frame.
+class StackLayout {
+ unsigned MaxAlignment;
+
+ struct StackRegion {
+ unsigned Start;
+ unsigned End;
+ StackColoring::LiveRange Range;
+ StackRegion(unsigned Start, unsigned End,
+ const StackColoring::LiveRange &Range)
+ : Start(Start), End(End), Range(Range) {}
+ };
+ /// The list of current stack regions, sorted by StackRegion::Start.
+ SmallVector<StackRegion, 16> Regions;
+
+ struct StackObject {
+ const Value *Handle;
+ unsigned Size, Alignment;
+ StackColoring::LiveRange Range;
+ };
+ SmallVector<StackObject, 8> StackObjects;
+
+ DenseMap<const Value *, unsigned> ObjectOffsets;
+
+ void layoutObject(StackObject &Obj);
+
+public:
+ StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {}
+ /// Add an object to the stack frame. Value pointer is opaque and used as a
+ /// handle to retrieve the object's offset in the frame later.
+ void addObject(const Value *V, unsigned Size, unsigned Alignment,
+ const StackColoring::LiveRange &Range);
+
+ /// Run the layout computation for all previously added objects.
+ void computeLayout();
+
+ /// Returns the offset to the object start in the stack frame.
+ unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }
+
+ /// Returns the size of the entire frame.
+ unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }
+
+ /// Returns the alignment of the frame.
+ unsigned getFrameAlignment() { return MaxAlignment; }
+ void print(raw_ostream &OS);
+};
+
+} // namespace safestack
+} // namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 000000000000..efde61ece639
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,641 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <climits>
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+#ifndef NDEBUG
+static cl::opt<bool> StressSchedOpt(
+ "stress-sched", cl::Hidden, cl::init(false),
+ cl::desc("Stress test instruction scheduling"));
+#endif
+
+void SchedulingPriorityQueue::anchor() { }
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+ : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()),
+ TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
+ MRI(mf.getRegInfo()), EntrySU(), ExitSU() {
+#ifndef NDEBUG
+ StressSched = StressSchedOpt;
+#endif
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// Clear the DAG state (e.g. between scheduling regions).
+void ScheduleDAG::clearDAG() {
+ SUnits.clear();
+ EntrySU = SUnit();
+ ExitSU = SUnit();
+}
+
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return nullptr;
+ return &TII->get(Node->getMachineOpcode());
+}
+
+/// addPred - This adds the specified edge as a pred of the current node if
+/// not already. It also adds the current node as a successor of the
+/// specified node.
+bool SUnit::addPred(const SDep &D, bool Required) {
+ // If this node already has this dependence, don't add a redundant one.
+ for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ // Zero-latency weak edges may be added purely for heuristic ordering. Don't
+ // add them if another kind of edge already exists.
+ if (!Required && I->getSUnit() == D.getSUnit())
+ return false;
+ if (I->overlaps(D)) {
+ // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
+ if (I->getLatency() < D.getLatency()) {
+ SUnit *PredSU = I->getSUnit();
+ // Find the corresponding successor in N.
+ SDep ForwardD = *I;
+ ForwardD.setSUnit(this);
+ for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(),
+ EE = PredSU->Succs.end(); II != EE; ++II) {
+ if (*II == ForwardD) {
+ II->setLatency(D.getLatency());
+ break;
+ }
+ }
+ I->setLatency(D.getLatency());
+ }
+ return false;
+ }
+ }
+ // Now add a corresponding succ to N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ // Update the bookkeeping.
+ if (D.getKind() == SDep::Data) {
+ assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
+ assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+ ++NumPreds;
+ ++N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak()) {
+ ++WeakPredsLeft;
+ }
+ else {
+ assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+ ++NumPredsLeft;
+ }
+ }
+ if (!isScheduled) {
+ if (D.isWeak()) {
+ ++N->WeakSuccsLeft;
+ }
+ else {
+ assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++N->NumSuccsLeft;
+ }
+ }
+ Preds.push_back(D);
+ N->Succs.push_back(P);
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return true;
+}
+
+/// removePred - This removes the specified edge as a pred of the current
+/// node if it exists. It also removes the current node as a successor of
+/// the specified node.
+void SUnit::removePred(const SDep &D) {
+ // Find the matching predecessor.
+ for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I)
+ if (*I == D) {
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(),
+ N->Succs.end(), P);
+ assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
+ N->Succs.erase(Succ);
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ assert(NumPreds > 0 && "NumPreds will underflow!");
+ assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak())
+ --WeakPredsLeft;
+ else {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
+ }
+ }
+ if (!isScheduled) {
+ if (D.isWeak())
+ --N->WeakSuccsLeft;
+ else {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
+ }
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
+ return;
+ }
+}
+
+void SUnit::setDepthDirty() {
+ if (!isDepthCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isDepthCurrent = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(),
+ E = SU->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isDepthCurrent)
+ WorkList.push_back(SuccSU);
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+ if (!isHeightCurrent) return;
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *SU = WorkList.pop_back_val();
+ SU->isHeightCurrent = false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),
+ E = SU->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isHeightCurrent)
+ WorkList.push_back(PredSU);
+ }
+ } while (!WorkList.empty());
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+ if (NewDepth <= getDepth())
+ return;
+ setDepthDirty();
+ Depth = NewDepth;
+ isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+ if (NewHeight <= getHeight())
+ return;
+ setHeightDirty();
+ Height = NewHeight;
+ isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxPredDepth = 0;
+ for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+ E = Cur->Preds.end(); I != E; ++I) {
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->isDepthCurrent)
+ MaxPredDepth = std::max(MaxPredDepth,
+ PredSU->Depth + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(PredSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxPredDepth != Cur->Depth) {
+ Cur->setDepthDirty();
+ Cur->Depth = MaxPredDepth;
+ }
+ Cur->isDepthCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+ SmallVector<SUnit*, 8> WorkList;
+ WorkList.push_back(this);
+ do {
+ SUnit *Cur = WorkList.back();
+
+ bool Done = true;
+ unsigned MaxSuccHeight = 0;
+ for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+ E = Cur->Succs.end(); I != E; ++I) {
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isHeightCurrent)
+ MaxSuccHeight = std::max(MaxSuccHeight,
+ SuccSU->Height + I->getLatency());
+ else {
+ Done = false;
+ WorkList.push_back(SuccSU);
+ }
+ }
+
+ if (Done) {
+ WorkList.pop_back();
+ if (MaxSuccHeight != Cur->Height) {
+ Cur->setHeightDirty();
+ Cur->Height = MaxSuccHeight;
+ }
+ Cur->isHeightCurrent = true;
+ }
+ } while (!WorkList.empty());
+}
+
+void SUnit::biasCriticalPath() {
+ if (NumPreds < 2)
+ return;
+
+ SUnit::pred_iterator BestI = Preds.begin();
+ unsigned MaxDepth = BestI->getSUnit()->getDepth();
+ for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E;
+ ++I) {
+ if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
+ BestI = I;
+ }
+ if (BestI != Preds.begin())
+ std::swap(*Preds.begin(), *BestI);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+ dbgs() << "SU(" << NodeNum << "): ";
+ G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+ dump(G);
+
+ dbgs() << " # preds left : " << NumPredsLeft << "\n";
+ dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ if (WeakPredsLeft)
+ dbgs() << " # weak preds left : " << WeakPredsLeft << "\n";
+ if (WeakSuccsLeft)
+ dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n";
+ dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
+ dbgs() << " Latency : " << Latency << "\n";
+ dbgs() << " Depth : " << getDepth() << "\n";
+ dbgs() << " Height : " << getHeight() << "\n";
+
+ if (Preds.size() != 0) {
+ dbgs() << " Predecessors:\n";
+ for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << "\n";
+ }
+ }
+ if (Succs.size() != 0) {
+ dbgs() << " Successors:\n";
+ for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+ I != E; ++I) {
+ dbgs() << " ";
+ switch (I->getKind()) {
+ case SDep::Data: dbgs() << "val "; break;
+ case SDep::Anti: dbgs() << "anti"; break;
+ case SDep::Output: dbgs() << "out "; break;
+ case SDep::Order: dbgs() << "ch "; break;
+ }
+ dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+ if (I->isArtificial())
+ dbgs() << " *";
+ dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << "\n";
+ }
+ }
+}
+#endif
+
+#ifndef NDEBUG
+/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+/// their state is consistent. Return the number of scheduled nodes.
+///
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
+ bool AnyNotSched = false;
+ unsigned DeadNodes = 0;
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ if (!SUnits[i].isScheduled) {
+ if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ ++DeadNodes;
+ continue;
+ }
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has not been scheduled!\n";
+ AnyNotSched = true;
+ }
+ if (SUnits[i].isScheduled &&
+ (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
+ unsigned(INT_MAX)) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has an unexpected "
+ << (isBottomUp ? "Height" : "Depth") << " value!\n";
+ AnyNotSched = true;
+ }
+ if (isBottomUp) {
+ if (SUnits[i].NumSuccsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has successors left!\n";
+ AnyNotSched = true;
+ }
+ } else {
+ if (SUnits[i].NumPredsLeft != 0) {
+ if (!AnyNotSched)
+ dbgs() << "*** Scheduling failed! ***\n";
+ SUnits[i].dump(this);
+ dbgs() << "has predecessors left!\n";
+ AnyNotSched = true;
+ }
+ }
+ }
+ assert(!AnyNotSched);
+ return SUnits.size() - DeadNodes;
+}
+#endif
+
+/// InitDAGTopologicalSorting - create the initial topological
+/// ordering from the DAG to be scheduled.
+///
+/// The idea of the algorithm is taken from
+/// "Online algorithms for managing the topological order of
+/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// "Maintaining a topological order under edge insertions".
+///
+/// Short description of the algorithm:
+///
+/// Topological ordering, ord, of a DAG maps each node to a topological
+/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+///
+/// This means that if there is a path from the node X to the node Z,
+/// then ord(X) < ord(Z).
+///
+/// This property can be used to check for reachability of nodes:
+/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// create a cycle.
+///
+/// The algorithm first computes a topological ordering for the DAG by
+/// initializing the Index2Node and Node2Index arrays and then tries to keep
+/// the ordering up-to-date after edge insertions by reordering the DAG.
+///
+/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
+/// the nodes reachable from Y, and then shifts them using Shift to lie
+/// immediately after X in Index2Node.
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+ WorkList.reserve(DAGSize);
+
+ Index2Node.resize(DAGSize);
+ Node2Index.resize(DAGSize);
+
+ // Initialize the data structures.
+ if (ExitSU)
+ WorkList.push_back(ExitSU);
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ int NodeNum = SU->NodeNum;
+ unsigned Degree = SU->Succs.size();
+ // Temporarily use the Node2Index array as scratch space for degree counts.
+ Node2Index[NodeNum] = Degree;
+
+ // Is it a node without dependencies?
+ if (Degree == 0) {
+ assert(SU->Succs.empty() && "SUnit should have no successors");
+ // Collect leaf nodes.
+ WorkList.push_back(SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ if (SU->NodeNum < DAGSize)
+ Allocate(SU->NodeNum, --Id);
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit *SU = I->getSUnit();
+ if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
+ // If all dependencies of the node are processed already,
+ // then the node can be computed now.
+ WorkList.push_back(SU);
+ }
+ }
+
+ Visited.resize(DAGSize);
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ "Wrong topological sorting");
+ }
+ }
+#endif
+}
+
+/// AddPred - Updates the topological ordering to accommodate an edge
+/// to be added from SUnit X to SUnit Y.
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[Y->NodeNum];
+ UpperBound = Node2Index[X->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(X) < Ord(Y) ?
+ if (LowerBound < UpperBound) {
+ // Update the topological order.
+ Visited.reset();
+ DFS(Y, UpperBound, HasLoop);
+ assert(!HasLoop && "Inserted edge creates a loop!");
+ // Recompute topological indexes.
+ Shift(Visited, LowerBound, UpperBound);
+ }
+}
+
+/// RemovePred - Updates the topological ordering to accommodate an
+/// an edge to be removed from the specified node N from the predecessors
+/// of the current node M.
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+ // InitDAGTopologicalSorting();
+}
+
+/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
+/// all nodes affected by the edge insertion. These nodes will later get new
+/// topological indexes by means of the Shift method.
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+ bool &HasLoop) {
+ std::vector<const SUnit*> WorkList;
+ WorkList.reserve(SUnits.size());
+
+ WorkList.push_back(SU);
+ do {
+ SU = WorkList.back();
+ WorkList.pop_back();
+ Visited.set(SU->NodeNum);
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ unsigned s = SU->Succs[I].getSUnit()->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+ if (s >= Node2Index.size())
+ continue;
+ if (Node2Index[s] == UpperBound) {
+ HasLoop = true;
+ return;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ WorkList.push_back(SU->Succs[I].getSUnit());
+ }
+ }
+ } while (!WorkList.empty());
+}
+
+/// Shift - Renumber the nodes so that the topological ordering is
+/// preserved.
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+ int UpperBound) {
+ std::vector<int> L;
+ int shift = 0;
+ int i;
+
+ for (i = LowerBound; i <= UpperBound; ++i) {
+ // w is node at topological index i.
+ int w = Index2Node[i];
+ if (Visited.test(w)) {
+ // Unmark.
+ Visited.reset(w);
+ L.push_back(w);
+ shift = shift + 1;
+ } else {
+ Allocate(w, i - shift);
+ }
+ }
+
+ for (unsigned j = 0; j < L.size(); ++j) {
+ Allocate(L[j], i - shift);
+ i = i + 1;
+ }
+}
+
+
+/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
+/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+ // Is SU reachable from TargetSU via successor edges?
+ if (IsReachable(SU, TargetSU))
+ return true;
+ for (SUnit::pred_iterator
+ I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
+ if (I->isAssignedRegDep() &&
+ IsReachable(SU, I->getSUnit()))
+ return true;
+ return false;
+}
+
+/// IsReachable - Checks if SU is reachable from TargetSU.
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+ const SUnit *TargetSU) {
+ // If insertion of the edge SU->TargetSU would create a cycle
+ // then there is a path from TargetSU to SU.
+ int UpperBound, LowerBound;
+ LowerBound = Node2Index[TargetSU->NodeNum];
+ UpperBound = Node2Index[SU->NodeNum];
+ bool HasLoop = false;
+ // Is Ord(TargetSU) < Ord(SU) ?
+ if (LowerBound < UpperBound) {
+ Visited.reset();
+ // There may be a path from TargetSU to SU. Check for it.
+ DFS(TargetSU, UpperBound, HasLoop);
+ }
+ return HasLoop;
+}
+
+/// Allocate - assign the topological index to the node n.
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+ Node2Index[n] = index;
+ Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
+ : SUnits(sunits), ExitSU(exitsu) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 000000000000..22bfd4d1964c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,1711 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAGInstrs class, which implements re-scheduling
+// of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable use of AA during MI DAG construction"));
+
+static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
+ cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
+
+// Note: the two options below might be used in tuning compile time vs
+// output quality. Setting HugeRegion so large that it will never be
+// reached means best-effort, but may be slow.
+
+// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
+// together hold this many SUs, a reduction of maps will be done.
+static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
+ cl::init(1000), cl::desc("The limit to use while constructing the DAG "
+ "prior to scheduling, at which point a trade-off "
+ "is made to avoid excessive compile time."));
+
+static cl::opt<unsigned> ReductionSize(
+ "dag-maps-reduction-size", cl::Hidden,
+ cl::desc("A huge scheduling region will have maps reduced by this many "
+ "nodes at a time. Defaults to HugeRegion / 2."));
+
+static unsigned getReductionSize() {
+ // Always reduce a huge region with half of the elements, except
+ // when user sets this number explicitly.
+ if (ReductionSize.getNumOccurrences() == 0)
+ return HugeRegion / 2;
+ return ReductionSize;
+}
+
+static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ dbgs() << "{ ";
+ for (auto *su : L) {
+ dbgs() << "SU(" << su->NodeNum << ")";
+ if (su != L.back())
+ dbgs() << ", ";
+ }
+ dbgs() << "}\n";
+#endif
+}
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+ const MachineLoopInfo *mli,
+ bool RemoveKillFlags)
+ : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
+ RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
+ TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
+ UnknownValue(UndefValue::get(
+ Type::getVoidTy(mf.getFunction()->getContext()))),
+ FirstDbgValue(nullptr) {
+ DbgValues.clear();
+
+ const TargetSubtargetInfo &ST = mf.getSubtarget();
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
+}
+
+/// getUnderlyingObjectFromInt - This is the function that does the work of
+/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+ do {
+ if (const Operator *U = dyn_cast<Operator>(V)) {
+ // If we find a ptrtoint, we can transfer control back to the
+ // regular getUnderlyingObjectFromInt.
+ if (U->getOpcode() == Instruction::PtrToInt)
+ return U->getOperand(0);
+ // If we find an add of a constant, a multiplied value, or a phi, it's
+ // likely that the other operand will lead us to the base
+ // object. We don't have to worry about the case where the
+ // object address is somehow being computed by the multiply,
+ // because our callers only care when the result is an
+ // identifiable object.
+ if (U->getOpcode() != Instruction::Add ||
+ (!isa<ConstantInt>(U->getOperand(1)) &&
+ Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
+ !isa<PHINode>(U->getOperand(1))))
+ return V;
+ V = U->getOperand(0);
+ } else {
+ return V;
+ }
+ assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
+ } while (1);
+}
+
+/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
+/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+static void getUnderlyingObjects(const Value *V,
+ SmallVectorImpl<Value *> &Objects,
+ const DataLayout &DL) {
+ SmallPtrSet<const Value *, 16> Visited;
+ SmallVector<const Value *, 4> Working(1, V);
+ do {
+ V = Working.pop_back_val();
+
+ SmallVector<Value *, 4> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
+
+ for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
+ I != IE; ++I) {
+ V = *I;
+ if (!Visited.insert(V).second)
+ continue;
+ if (Operator::getOpcode(V) == Instruction::IntToPtr) {
+ const Value *O =
+ getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+ if (O->getType()->isPointerTy()) {
+ Working.push_back(O);
+ continue;
+ }
+ }
+ Objects.push_back(const_cast<Value *>(V));
+ }
+ } while (!Working.empty());
+}
+
+/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
+/// information and it can be tracked to a normal reference to a known
+/// object, return the Value for that object.
+static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
+ const MachineFrameInfo *MFI,
+ UnderlyingObjectsVector &Objects,
+ const DataLayout &DL) {
+ auto allMMOsOkay = [&]() {
+ for (const MachineMemOperand *MMO : MI->memoperands()) {
+ if (MMO->isVolatile())
+ return false;
+
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
+ // Function that contain tail calls don't have unique PseudoSourceValue
+ // objects. Two PseudoSourceValues might refer to the same or
+ // overlapping locations. The client code calling this function assumes
+ // this is not the case. So return a conservative answer of no known
+ // object.
+ if (MFI->hasTailCall())
+ return false;
+
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ if (PSV->isAliased(MFI))
+ return false;
+
+ bool MayAlias = PSV->mayAlias(MFI);
+ Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
+ } else if (const Value *V = MMO->getValue()) {
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(V, Objs, DL);
+
+ for (Value *V : Objs) {
+ if (!isIdentifiedObject(V))
+ return false;
+
+ Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
+ }
+ } else
+ return false;
+ }
+ return true;
+ };
+
+ if (!allMMOsOkay())
+ Objects.clear();
+}
+
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
+ BB = bb;
+}
+
+void ScheduleDAGInstrs::finishBlock() {
+ // Subclasses should no longer refer to the old block.
+ BB = nullptr;
+}
+
+/// Initialize the DAG and common scheduler state for the current scheduling
+/// region. This does not actually create the DAG, only clears it. The
+/// scheduling driver may call BuildSchedGraph multiple times per scheduling
+/// region.
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+ MachineBasicBlock::iterator begin,
+ MachineBasicBlock::iterator end,
+ unsigned regioninstrs) {
+ assert(bb == BB && "startBlock should set BB");
+ RegionBegin = begin;
+ RegionEnd = end;
+ NumRegionInstrs = regioninstrs;
+}
+
+/// Close the current scheduling region. Don't clear any state in case the
+/// driver wants to refer to the previous scheduling region.
+void ScheduleDAGInstrs::exitRegion() {
+ // Nothing to do.
+}
+
+/// addSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+ MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
+ ExitSU.setInstr(ExitMI);
+ bool AllDepKnown = ExitMI &&
+ (ExitMI->isCall() || ExitMI->isBarrier());
+ if (ExitMI && AllDepKnown) {
+ // If it's a call or a barrier, add dependencies on the defs and uses of
+ // instruction.
+ for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = ExitMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ if (TRI->isPhysicalRegister(Reg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(&ExitSU, i);
+ }
+ } else {
+ // For others, e.g. fallthrough, conditional branch, assume the exit
+ // uses all the registers that are livein to the successor blocks.
+ assert(Uses.empty() && "Uses in set before adding deps?");
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (const auto &LI : (*SI)->liveins()) {
+ if (!Uses.contains(LI.PhysReg))
+ Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
+ }
+ }
+}
+
+/// MO is an operand of SU's instruction that defines a physical register. Add
+/// data dependencies from SU to any uses of the physical register.
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
+ assert(MO.isDef() && "expect physreg def");
+
+ // Ask the target if address-backscheduling is desirable, and if so how much.
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
+ if (!Uses.contains(*Alias))
+ continue;
+ for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
+ SUnit *UseSU = I->SU;
+ if (UseSU == SU)
+ continue;
+
+ // Adjust the dependence latency using operand def/use information,
+ // then allow the target to perform its own adjustments.
+ int UseOp = I->OpIdx;
+ MachineInstr *RegUse = nullptr;
+ SDep Dep;
+ if (UseOp < 0)
+ Dep = SDep(SU, SDep::Artificial);
+ else {
+ // Set the hasPhysRegDefs only for physreg defs that have a use within
+ // the scheduling region.
+ SU->hasPhysRegDefs = true;
+ Dep = SDep(SU, SDep::Data, *Alias);
+ RegUse = UseSU->getInstr();
+ }
+ Dep.setLatency(
+ SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
+ UseOp));
+
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ UseSU->addPred(Dep);
+ }
+ }
+}
+
+/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
+/// this SUnit to following instructions in the same scheduling region that
+/// depend the physical register referenced at OperIdx.
+void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
+ MachineInstr *MI = SU->getInstr();
+ MachineOperand &MO = MI->getOperand(OperIdx);
+
+ // Optionally add output and anti dependencies. For anti
+ // dependencies we use a latency of 0 because for a multi-issue
+ // target we want to allow the defining instruction to issue
+ // in the same cycle as the using instruction.
+ // TODO: Using a latency of 1 here for output dependencies assumes
+ // there's no cost for reusing registers.
+ SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+ for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+ Alias.isValid(); ++Alias) {
+ if (!Defs.contains(*Alias))
+ continue;
+ for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
+ SUnit *DefSU = I->SU;
+ if (DefSU == &ExitSU)
+ continue;
+ if (DefSU != SU &&
+ (Kind != SDep::Output || !MO.isDead() ||
+ !DefSU->getInstr()->registerDefIsDead(*Alias))) {
+ if (Kind == SDep::Anti)
+ DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
+ else {
+ SDep Dep(SU, Kind, /*Reg=*/*Alias);
+ Dep.setLatency(
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
+ DefSU->addPred(Dep);
+ }
+ }
+ }
+ }
+
+ if (!MO.isDef()) {
+ SU->hasPhysRegUses = true;
+ // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+ // retrieve the existing SUnits list for this register's uses.
+ // Push this SUnit on the use list.
+ Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
+ if (RemoveKillFlags)
+ MO.setIsKill(false);
+ }
+ else {
+ addPhysRegDataDeps(SU, OperIdx);
+ unsigned Reg = MO.getReg();
+
+ // clear this register's use list
+ if (Uses.contains(Reg))
+ Uses.eraseAll(Reg);
+
+ if (!MO.isDead()) {
+ Defs.eraseAll(Reg);
+ } else if (SU->isCall) {
+ // Calls will not be reordered because of chain dependencies (see
+ // below). Since call operands are dead, calls may continue to be added
+ // to the DefList making dependence checking quadratic in the size of
+ // the block. Instead, we leave only one call at the back of the
+ // DefList.
+ Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg);
+ Reg2SUnitsMap::iterator B = P.first;
+ Reg2SUnitsMap::iterator I = P.second;
+ for (bool isBegin = I == B; !isBegin; /* empty */) {
+ isBegin = (--I) == B;
+ if (!I->SU->isCall)
+ break;
+ I = Defs.erase(I);
+ }
+ }
+
+ // Defs are pushed in the order they are visited and never reordered.
+ Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
+ }
+}
+
+LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
+{
+ unsigned Reg = MO.getReg();
+ // No point in tracking lanemasks if we don't have interesting subregisters.
+ const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
+ if (!RC.HasDisjunctSubRegs)
+ return ~0u;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0)
+ return RC.getLaneMask();
+ return TRI->getSubRegIndexLaneMask(SubReg);
+}
+
+/// addVRegDefDeps - Add register output and data dependencies from this SUnit
+/// to instructions that occur later in the same scheduling region if they read
+/// from or write to the virtual register defined at OperIdx.
+///
+/// TODO: Hoist loop induction variable increments. This has to be
+/// reevaluated. Generally, IV scheduling should be done before coalescing.
+void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
+ MachineInstr *MI = SU->getInstr();
+ MachineOperand &MO = MI->getOperand(OperIdx);
+ unsigned Reg = MO.getReg();
+
+ LaneBitmask DefLaneMask;
+ LaneBitmask KillLaneMask;
+ if (TrackLaneMasks) {
+ bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
+ DefLaneMask = getLaneMaskForMO(MO);
+ // If we have a <read-undef> flag, none of the lane values comes from an
+ // earlier instruction.
+ KillLaneMask = IsKill ? ~0u : DefLaneMask;
+
+ // Clear undef flag, we'll re-add it later once we know which subregister
+ // Def is first.
+ MO.setIsUndef(false);
+ } else {
+ DefLaneMask = ~0u;
+ KillLaneMask = ~0u;
+ }
+
+ if (MO.isDead()) {
+ assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
+ "Dead defs should have no uses");
+ } else {
+ // Add data dependence to all uses we found so far.
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
+ E = CurrentVRegUses.end(); I != E; /*empty*/) {
+ LaneBitmask LaneMask = I->LaneMask;
+ // Ignore uses of other lanes.
+ if ((LaneMask & KillLaneMask) == 0) {
+ ++I;
+ continue;
+ }
+
+ if ((LaneMask & DefLaneMask) != 0) {
+ SUnit *UseSU = I->SU;
+ MachineInstr *Use = UseSU->getInstr();
+ SDep Dep(SU, SDep::Data, Reg);
+ Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
+ I->OperandIndex));
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ UseSU->addPred(Dep);
+ }
+
+ LaneMask &= ~KillLaneMask;
+ // If we found a Def for all lanes of this use, remove it from the list.
+ if (LaneMask != 0) {
+ I->LaneMask = LaneMask;
+ ++I;
+ } else
+ I = CurrentVRegUses.erase(I);
+ }
+ }
+
+ // Shortcut: Singly defined vregs do not have output/anti dependencies.
+ if (MRI.hasOneDef(Reg))
+ return;
+
+ // Add output dependence to the next nearest defs of this vreg.
+ //
+ // Unless this definition is dead, the output dependence should be
+ // transitively redundant with antidependencies from this definition's
+ // uses. We're conservative for now until we have a way to guarantee the uses
+ // are not eliminated sometime during scheduling. The output dependence edge
+ // is also useful if output latency exceeds def-use latency.
+ LaneBitmask LaneMask = DefLaneMask;
+ for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+ CurrentVRegDefs.end())) {
+ // Ignore defs for other lanes.
+ if ((V2SU.LaneMask & LaneMask) == 0)
+ continue;
+ // Add an output dependence.
+ SUnit *DefSU = V2SU.SU;
+ // Ignore additional defs of the same lanes in one instruction. This can
+ // happen because lanemasks are shared for targets with too many
+ // subregisters. We also use some representration tricks/hacks where we
+ // add super-register defs/uses, to imply that although we only access parts
+ // of the reg we care about the full one.
+ if (DefSU == SU)
+ continue;
+ SDep Dep(SU, SDep::Output, Reg);
+ Dep.setLatency(
+ SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
+ DefSU->addPred(Dep);
+
+ // Update current definition. This can get tricky if the def was about a
+ // bigger lanemask before. We then have to shrink it and create a new
+ // VReg2SUnit for the non-overlapping part.
+ LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
+ LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
+ V2SU.SU = SU;
+ V2SU.LaneMask = OverlapMask;
+ if (NonOverlapMask != 0)
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
+ }
+ // If there was no CurrentVRegDefs entry for some lanes yet, create one.
+ if (LaneMask != 0)
+ CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
+}
+
+/// addVRegUseDeps - Add a register data dependency if the instruction that
+/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
+/// register antidependency from this SUnit to instructions that occur later in
+/// the same scheduling region if they write the virtual register.
+///
+/// TODO: Handle ExitSU "uses" properly.
+void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
+ const MachineInstr *MI = SU->getInstr();
+ const MachineOperand &MO = MI->getOperand(OperIdx);
+ unsigned Reg = MO.getReg();
+
+ // Remember the use. Data dependencies will be added when we find the def.
+ LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
+ CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
+
+ // Add antidependences to the following defs of the vreg.
+ for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+ CurrentVRegDefs.end())) {
+ // Ignore defs for unrelated lanes.
+ LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
+ if ((PrevDefLaneMask & LaneMask) == 0)
+ continue;
+ if (V2SU.SU == SU)
+ continue;
+
+ V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
+ }
+}
+
+/// Return true if MI is an instruction we are unable to reason about
+/// (like a call or something with unmodeled side effects).
+static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
+ return MI->isCall() || MI->hasUnmodeledSideEffects() ||
+ (MI->hasOrderedMemoryRef() && !MI->isInvariantLoad(AA));
+}
+
+/// This returns true if the two MIs need a chain edge between them.
+/// This is called on normal stores and loads.
+static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ const DataLayout &DL, MachineInstr *MIa,
+ MachineInstr *MIb) {
+ const MachineFunction *MF = MIa->getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ assert ((MIa->mayStore() || MIb->mayStore()) &&
+ "Dependency checked between two loads");
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if (TII->areMemAccessesTriviallyDisjoint(*MIa, *MIb, AA))
+ return false;
+
+ // To this point analysis is generic. From here on we do need AA.
+ if (!AA)
+ return true;
+
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+ return true;
+
+ MachineMemOperand *MMOa = *MIa->memoperands_begin();
+ MachineMemOperand *MMOb = *MIb->memoperands_begin();
+
+ if (!MMOa->getValue() || !MMOb->getValue())
+ return true;
+
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // FIXME: Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
+ int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
+ int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+
+ AliasResult AAResult =
+ AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
+ UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MMOb->getValue(), Overlapb,
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+
+ return (AAResult != NoAlias);
+}
+
+/// Check whether two objects need a chain edge and add it if needed.
+void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
+ unsigned Latency) {
+ if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
+ SUb->getInstr())) {
+ SDep Dep(SUa, SDep::MayAliasMem);
+ Dep.setLatency(Latency);
+ SUb->addPred(Dep);
+ }
+}
+
+/// Create an SUnit for each real instruction, numbered in top-down topological
+/// order. The instruction order A < B, implies that no edge exists from B to A.
+///
+/// Map each real instruction to its SUnit.
+///
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
+void ScheduleDAGInstrs::initSUnits() {
+ // We'll be allocating one SUnit for each real instruction in the region,
+ // which is contained within a basic block.
+ SUnits.reserve(NumRegionInstrs);
+
+ for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) {
+ if (MI.isDebugValue())
+ continue;
+
+ SUnit *SU = newSUnit(&MI);
+ MISUnitMap[&MI] = SU;
+
+ SU->isCall = MI.isCall();
+ SU->isCommutable = MI.isCommutable();
+
+ // Assign the Latency field of SU using target-provided information.
+ SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
+
+ // If this SUnit uses a reserved or unbuffered resource, mark it as such.
+ //
+ // Reserved resources block an instruction from issuing and stall the
+ // entire pipeline. These are identified by BufferSize=0.
+ //
+ // Unbuffered resources prevent execution of subsequent instructions that
+ // require the same resources. This is used for in-order execution pipelines
+ // within an out-of-order core. These are identified by BufferSize=1.
+ if (SchedModel.hasInstrSchedModel()) {
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
+ case 0:
+ SU->hasReservedResource = true;
+ break;
+ case 1:
+ SU->isUnbuffered = true;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+}
+
+void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
+ const MachineInstr *MI = SU->getInstr();
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ if (!MO.readsReg())
+ continue;
+ if (TrackLaneMasks && !MO.isUse())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // Ignore re-defs.
+ if (TrackLaneMasks) {
+ bool FoundDef = false;
+ for (const MachineOperand &MO2 : MI->operands()) {
+ if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
+ FoundDef = true;
+ break;
+ }
+ }
+ if (FoundDef)
+ continue;
+ }
+
+ // Record this local VReg use.
+ VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
+ for (; UI != VRegUses.end(); ++UI) {
+ if (UI->SU == SU)
+ break;
+ }
+ if (UI == VRegUses.end())
+ VRegUses.insert(VReg2SUnit(Reg, 0, SU));
+ }
+}
+
+class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
+
+ /// Current total number of SUs in map.
+ unsigned NumNodes;
+
+ /// 1 for loads, 0 for stores. (see comment in SUList)
+ unsigned TrueMemOrderLatency;
+public:
+
+ Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
+
+ /// To keep NumNodes up to date, insert() is used instead of
+ /// this operator w/ push_back().
+ ValueType &operator[](const SUList &Key) {
+ llvm_unreachable("Don't use. Use insert() instead."); };
+
+ /// Add SU to the SUList of V. If Map grows huge, reduce its size
+ /// by calling reduce().
+ void inline insert(SUnit *SU, ValueType V) {
+ MapVector::operator[](V).push_back(SU);
+ NumNodes++;
+ }
+
+ /// Clears the list of SUs mapped to V.
+ void inline clearList(ValueType V) {
+ iterator Itr = find(V);
+ if (Itr != end()) {
+ assert (NumNodes >= Itr->second.size());
+ NumNodes -= Itr->second.size();
+
+ Itr->second.clear();
+ }
+ }
+
+ /// Clears map from all contents.
+ void clear() {
+ MapVector<ValueType, SUList>::clear();
+ NumNodes = 0;
+ }
+
+ unsigned inline size() const { return NumNodes; }
+
+ /// Count the number of SUs in this map after a reduction.
+ void reComputeSize(void) {
+ NumNodes = 0;
+ for (auto &I : *this)
+ NumNodes += I.second.size();
+ }
+
+ unsigned inline getTrueMemOrderLatency() const {
+ return TrueMemOrderLatency;
+ }
+
+ void dump();
+};
+
+void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
+ Value2SUsMap &Val2SUsMap) {
+ for (auto &I : Val2SUsMap)
+ addChainDependencies(SU, I.second,
+ Val2SUsMap.getTrueMemOrderLatency());
+}
+
+void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
+ Value2SUsMap &Val2SUsMap,
+ ValueType V) {
+ Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
+ if (Itr != Val2SUsMap.end())
+ addChainDependencies(SU, Itr->second,
+ Val2SUsMap.getTrueMemOrderLatency());
+}
+
+void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
+ assert (BarrierChain != nullptr);
+
+ for (auto &I : map) {
+ SUList &sus = I.second;
+ for (auto *SU : sus)
+ SU->addPredBarrier(BarrierChain);
+ }
+ map.clear();
+}
+
+void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
+ assert (BarrierChain != nullptr);
+
+ // Go through all lists of SUs.
+ for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
+ Value2SUsMap::iterator CurrItr = I++;
+ SUList &sus = CurrItr->second;
+ SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
+ for (; SUItr != SUEE; ++SUItr) {
+ // Stop on BarrierChain or any instruction above it.
+ if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
+ break;
+
+ (*SUItr)->addPredBarrier(BarrierChain);
+ }
+
+ // Remove also the BarrierChain from list if present.
+ if (SUItr != SUEE && *SUItr == BarrierChain)
+ SUItr++;
+
+ // Remove all SUs that are now successors of BarrierChain.
+ if (SUItr != sus.begin())
+ sus.erase(sus.begin(), SUItr);
+ }
+
+ // Remove all entries with empty su lists.
+ map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
+ return (mapEntry.second.empty()); });
+
+ // Recompute the size of the map (NumNodes).
+ map.reComputeSize();
+}
+
+/// If RegPressure is non-null, compute register pressure as a side effect. The
+/// DAG builder is an efficient place to do it because it already visits
+/// operands.
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
+ RegPressureTracker *RPTracker,
+ PressureDiffs *PDiffs,
+ LiveIntervals *LIS,
+ bool TrackLaneMasks) {
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+ bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
+ : ST.useAA();
+ AAForDep = UseAA ? AA : nullptr;
+
+ BarrierChain = nullptr;
+
+ this->TrackLaneMasks = TrackLaneMasks;
+ MISUnitMap.clear();
+ ScheduleDAG::clearDAG();
+
+ // Create an SUnit for each real instruction.
+ initSUnits();
+
+ if (PDiffs)
+ PDiffs->init(SUnits.size());
+
+ // We build scheduling units by walking a block's instruction list
+ // from bottom to top.
+
+ // Each MIs' memory operand(s) is analyzed to a list of underlying
+ // objects. The SU is then inserted in the SUList(s) mapped from the
+ // Value(s). Each Value thus gets mapped to lists of SUs depending
+ // on it, stores and loads kept separately. Two SUs are trivially
+ // non-aliasing if they both depend on only identified Values and do
+ // not share any common Value.
+ Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
+
+ // Certain memory accesses are known to not alias any SU in Stores
+ // or Loads, and have therefore their own 'NonAlias'
+ // domain. E.g. spill / reload instructions never alias LLVM I/R
+ // Values. It would be nice to assume that this type of memory
+ // accesses always have a proper memory operand modelling, and are
+ // therefore never unanalyzable, but this is conservatively not
+ // done.
+ Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
+
+ // Remove any stale debug info; sometimes BuildSchedGraph is called again
+ // without emitting the info from the previous call.
+ DbgValues.clear();
+ FirstDbgValue = nullptr;
+
+ assert(Defs.empty() && Uses.empty() &&
+ "Only BuildGraph should update Defs/Uses");
+ Defs.setUniverse(TRI->getNumRegs());
+ Uses.setUniverse(TRI->getNumRegs());
+
+ assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
+ assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
+ unsigned NumVirtRegs = MRI.getNumVirtRegs();
+ CurrentVRegDefs.setUniverse(NumVirtRegs);
+ CurrentVRegUses.setUniverse(NumVirtRegs);
+
+ VRegUses.clear();
+ VRegUses.setUniverse(NumVirtRegs);
+
+ // Model data dependencies between instructions being scheduled and the
+ // ExitSU.
+ addSchedBarrierDeps();
+
+ // Walk the list of instructions, from bottom moving up.
+ MachineInstr *DbgMI = nullptr;
+ for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
+ MII != MIE; --MII) {
+ MachineInstr &MI = *std::prev(MII);
+ if (DbgMI) {
+ DbgValues.push_back(std::make_pair(DbgMI, &MI));
+ DbgMI = nullptr;
+ }
+
+ if (MI.isDebugValue()) {
+ DbgMI = &MI;
+ continue;
+ }
+ SUnit *SU = MISUnitMap[&MI];
+ assert(SU && "No SUnit mapped to this MI");
+
+ if (RPTracker) {
+ collectVRegUses(SU);
+
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
+ if (TrackLaneMasks) {
+ SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
+ }
+ if (PDiffs != nullptr)
+ PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
+
+ RPTracker->recedeSkipDebugValues();
+ assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
+ RPTracker->recede(RegOpers);
+ }
+
+ assert(
+ (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
+ "Cannot schedule terminators or labels!");
+
+ // Add register-based dependencies (data, anti, and output).
+ // For some instructions (calls, returns, inline-asm, etc.) there can
+ // be explicit uses and implicit defs, in which case the use will appear
+ // on the operand list before the def. Do two passes over the operand
+ // list to make sure that defs are processed before any uses.
+ bool HasVRegDef = false;
+ for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI.getOperand(j);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (TRI->isPhysicalRegister(Reg))
+ addPhysRegDeps(SU, j);
+ else {
+ HasVRegDef = true;
+ addVRegDefDeps(SU, j);
+ }
+ }
+ // Now process all uses.
+ for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
+ const MachineOperand &MO = MI.getOperand(j);
+ // Only look at use operands.
+ // We do not need to check for MO.readsReg() here because subsequent
+ // subregister defs will get output dependence edges and need no
+ // additional use dependencies.
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ if (TRI->isPhysicalRegister(Reg))
+ addPhysRegDeps(SU, j);
+ else if (MO.readsReg()) // ignore undef operands
+ addVRegUseDeps(SU, j);
+ }
+
+ // If we haven't seen any uses in this scheduling region, create a
+ // dependence edge to ExitSU to model the live-out latency. This is required
+ // for vreg defs with no in-region use, and prefetches with no vreg def.
+ //
+ // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
+ // check currently relies on being called before adding chain deps.
+ if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
+ SDep Dep(SU, SDep::Artificial);
+ Dep.setLatency(SU->Latency - 1);
+ ExitSU.addPred(Dep);
+ }
+
+ // Add memory dependencies (Note: isStoreToStackSlot and
+ // isLoadFromStackSLot are not usable after stack slots are lowered to
+ // actual addresses).
+
+ // This is a barrier event that acts as a pivotal node in the DAG.
+ if (isGlobalMemoryObject(AA, &MI)) {
+
+ // Become the barrier chain.
+ if (BarrierChain)
+ BarrierChain->addPredBarrier(SU);
+ BarrierChain = SU;
+
+ DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+
+ // Add dependencies against everything below it and clear maps.
+ addBarrierChain(Stores);
+ addBarrierChain(Loads);
+ addBarrierChain(NonAliasStores);
+ addBarrierChain(NonAliasLoads);
+
+ continue;
+ }
+
+ // If it's not a store or a variant load, we're done.
+ if (!MI.mayStore() && !(MI.mayLoad() && !MI.isInvariantLoad(AA)))
+ continue;
+
+ // Always add dependecy edge to BarrierChain if present.
+ if (BarrierChain)
+ BarrierChain->addPredBarrier(SU);
+
+ // Find the underlying objects for MI. The Objs vector is either
+ // empty, or filled with the Values of memory locations which this
+ // SU depends on. An empty vector means the memory location is
+ // unknown, and may alias anything.
+ UnderlyingObjectsVector Objs;
+ getUnderlyingObjectsForInstr(&MI, MFI, Objs, MF.getDataLayout());
+
+ if (MI.mayStore()) {
+ if (Objs.empty()) {
+ // An unknown store depends on all stores and loads.
+ addChainDependencies(SU, Stores);
+ addChainDependencies(SU, NonAliasStores);
+ addChainDependencies(SU, Loads);
+ addChainDependencies(SU, NonAliasLoads);
+
+ // Map this store to 'UnknownValue'.
+ Stores.insert(SU, UnknownValue);
+ } else {
+ // Add precise dependencies against all previously seen memory
+ // accesses mapped to the same Value(s).
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Add dependencies to previous stores and loads mapped to V.
+ addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
+ addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
+ }
+ // Update the store map after all chains have been added to avoid adding
+ // self-loop edge if multiple underlying objects are present.
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Map this store to V.
+ (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
+ }
+ // The store may have dependencies to unanalyzable loads and
+ // stores.
+ addChainDependencies(SU, Loads, UnknownValue);
+ addChainDependencies(SU, Stores, UnknownValue);
+ }
+ } else { // SU is a load.
+ if (Objs.empty()) {
+ // An unknown load depends on all stores.
+ addChainDependencies(SU, Stores);
+ addChainDependencies(SU, NonAliasStores);
+
+ Loads.insert(SU, UnknownValue);
+ } else {
+ for (const UnderlyingObject &UnderlObj : Objs) {
+ ValueType V = UnderlObj.getValue();
+ bool ThisMayAlias = UnderlObj.mayAlias();
+
+ // Add precise dependencies against all previously seen stores
+ // mapping to the same Value(s).
+ addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
+
+ // Map this load to V.
+ (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
+ }
+ // The load may have dependencies to unanalyzable stores.
+ addChainDependencies(SU, Stores, UnknownValue);
+ }
+ }
+
+ // Reduce maps if they grow huge.
+ if (Stores.size() + Loads.size() >= HugeRegion) {
+ DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
+ reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
+ }
+ if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
+ DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
+ reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
+ }
+ }
+
+ if (DbgMI)
+ FirstDbgValue = DbgMI;
+
+ Defs.clear();
+ Uses.clear();
+ CurrentVRegDefs.clear();
+ CurrentVRegUses.clear();
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
+ PSV->printCustom(OS);
+ return OS;
+}
+
+void ScheduleDAGInstrs::Value2SUsMap::dump() {
+ for (auto &Itr : *this) {
+ if (Itr.first.is<const Value*>()) {
+ const Value *V = Itr.first.get<const Value*>();
+ if (isa<UndefValue>(V))
+ dbgs() << "Unknown";
+ else
+ V->printAsOperand(dbgs());
+ }
+ else if (Itr.first.is<const PseudoSourceValue*>())
+ dbgs() << Itr.first.get<const PseudoSourceValue*>();
+ else
+ llvm_unreachable("Unknown Value type.");
+
+ dbgs() << " : ";
+ dumpSUList(Itr.second);
+ }
+}
+
+/// Reduce maps in FIFO order, by N SUs. This is better than turning
+/// every Nth memory SU into BarrierChain in buildSchedGraph(), since
+/// it avoids unnecessary edges between seen SUs above the new
+/// BarrierChain, and those below it.
+void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
+ Value2SUsMap &loads, unsigned N) {
+ DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
+ stores.dump();
+ dbgs() << "Loading SUnits:\n";
+ loads.dump());
+
+ // Insert all SU's NodeNums into a vector and sort it.
+ std::vector<unsigned> NodeNums;
+ NodeNums.reserve(stores.size() + loads.size());
+ for (auto &I : stores)
+ for (auto *SU : I.second)
+ NodeNums.push_back(SU->NodeNum);
+ for (auto &I : loads)
+ for (auto *SU : I.second)
+ NodeNums.push_back(SU->NodeNum);
+ std::sort(NodeNums.begin(), NodeNums.end());
+
+ // The N last elements in NodeNums will be removed, and the SU with
+ // the lowest NodeNum of them will become the new BarrierChain to
+ // let the not yet seen SUs have a dependency to the removed SUs.
+ assert (N <= NodeNums.size());
+ SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
+ if (BarrierChain) {
+ // The aliasing and non-aliasing maps reduce independently of each
+ // other, but share a common BarrierChain. Check if the
+ // newBarrierChain is above the former one. If it is not, it may
+ // introduce a loop to use newBarrierChain, so keep the old one.
+ if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
+ BarrierChain->addPredBarrier(newBarrierChain);
+ BarrierChain = newBarrierChain;
+ DEBUG(dbgs() << "Inserting new barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+ }
+ else
+ DEBUG(dbgs() << "Keeping old barrier chain: SU("
+ << BarrierChain->NodeNum << ").\n";);
+ }
+ else
+ BarrierChain = newBarrierChain;
+
+ insertBarrierChain(stores);
+ insertBarrierChain(loads);
+
+ DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
+ stores.dump();
+ dbgs() << "Loading SUnits:\n";
+ loads.dump());
+}
+
+/// \brief Initialize register live-range state for updating kills.
+void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
+ // Start with no live registers.
+ LiveRegs.reset();
+
+ // Examine the live-in regs of all successors.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI) {
+ for (const auto &LI : (*SI)->liveins()) {
+ // Repeat, for reg and all subregs.
+ for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
+ }
+ }
+}
+
+/// \brief If we change a kill flag on the bundle instruction implicit register
+/// operands, then we also need to propagate that to any instructions inside
+/// the bundle which had the same kill state.
+static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
+ bool NewKillState,
+ const TargetRegisterInfo *TRI) {
+ if (MI->getOpcode() != TargetOpcode::BUNDLE)
+ return;
+
+ // Walk backwards from the last instruction in the bundle to the first.
+ // Once we set a kill flag on an instruction, we bail out, as otherwise we
+ // might set it on too many operands. We will clear as many flags as we
+ // can though.
+ MachineBasicBlock::instr_iterator Begin = MI->getIterator();
+ MachineBasicBlock::instr_iterator End = getBundleEnd(*MI);
+ while (Begin != End) {
+ if (NewKillState) {
+ if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+ return;
+ } else
+ (--End)->clearRegisterKills(Reg, TRI);
+ }
+}
+
+bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
+ // Setting kill flag...
+ if (!MO.isKill()) {
+ MO.setIsKill(true);
+ toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
+ return false;
+ }
+
+ // If MO itself is live, clear the kill flag...
+ if (LiveRegs.test(MO.getReg())) {
+ MO.setIsKill(false);
+ toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
+ return false;
+ }
+
+ // If any subreg of MO is live, then create an imp-def for that
+ // subreg and keep MO marked as killed.
+ MO.setIsKill(false);
+ toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
+ bool AllDead = true;
+ const unsigned SuperReg = MO.getReg();
+ MachineInstrBuilder MIB(MF, MI);
+ for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ MIB.addReg(*SubRegs, RegState::ImplicitDefine);
+ AllDead = false;
+ }
+ }
+
+ if(AllDead) {
+ MO.setIsKill(true);
+ toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
+ }
+ return false;
+}
+
+// FIXME: Reuse the LivePhysRegs utility for this.
+void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
+ DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+
+ LiveRegs.resize(TRI->getNumRegs());
+ BitVector killedRegs(TRI->getNumRegs());
+
+ startBlockForKills(MBB);
+
+ // Examine block from end to start...
+ unsigned Count = MBB->size();
+ for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+ I != E; --Count) {
+ MachineInstr &MI = *--I;
+ if (MI.isDebugValue())
+ continue;
+
+ // Update liveness. Registers that are defed but not used in this
+ // instruction are now dead. Mark register and all subregs as they
+ // are completely defined.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isRegMask())
+ LiveRegs.clearBitsNotInMask(MO.getRegMask());
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI.isRegTiedToUseOperand(i)) continue;
+
+ // Repeat for reg and all subregs.
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ LiveRegs.reset(*SubRegs);
+ }
+
+ // Examine all used registers and set/clear kill flag. When a
+ // register is used multiple times we only set the kill flag on
+ // the first use. Don't set kill flags on undef operands.
+ killedRegs.reset();
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+ bool kill = false;
+ if (!killedRegs.test(Reg)) {
+ kill = true;
+ // A register is not killed if any subregs are live...
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+ if (LiveRegs.test(*SubRegs)) {
+ kill = false;
+ break;
+ }
+ }
+
+ // If subreg is not live, then register is killed if it became
+ // live in this instruction
+ if (kill)
+ kill = !LiveRegs.test(Reg);
+ }
+
+ if (MO.isKill() != kill) {
+ DEBUG(dbgs() << "Fixing " << MO << " in ");
+ // Warning: toggleKillFlag may invalidate MO.
+ toggleKillFlag(&MI, MO);
+ DEBUG(MI.dump());
+ DEBUG({
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ MachineBasicBlock::instr_iterator Begin = MI.getIterator();
+ MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+ while (++Begin != End)
+ DEBUG(Begin->dump());
+ }
+ });
+ }
+
+ killedRegs.set(Reg);
+ }
+
+ // Mark any used register (that is not using undef) and subregs as
+ // now live...
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+ unsigned Reg = MO.getReg();
+ if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ LiveRegs.set(*SubRegs);
+ }
+ }
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ SU->getInstr()->dump();
+#endif
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ if (SU == &EntrySU)
+ oss << "<entry>";
+ else if (SU == &ExitSU)
+ oss << "<exit>";
+ else
+ SU->getInstr()->print(oss, /*SkipOpers=*/true);
+ return oss.str();
+}
+
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+ return "dag." + BB->getFullName();
+}
+
+//===----------------------------------------------------------------------===//
+// SchedDFSResult Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+/// \brief Internal state used to compute SchedDFSResult.
+class SchedDFSImpl {
+ SchedDFSResult &R;
+
+ /// Join DAG nodes into equivalence classes by their subtree.
+ IntEqClasses SubtreeClasses;
+ /// List PredSU, SuccSU pairs that represent data edges between subtrees.
+ std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+
+ struct RootData {
+ unsigned NodeID;
+ unsigned ParentNodeID; // Parent node (member of the parent subtree).
+ unsigned SubInstrCount; // Instr count in this tree only, not children.
+
+ RootData(unsigned id): NodeID(id),
+ ParentNodeID(SchedDFSResult::InvalidSubtreeID),
+ SubInstrCount(0) {}
+
+ unsigned getSparseSetIndex() const { return NodeID; }
+ };
+
+ SparseSet<RootData> RootSet;
+
+public:
+ SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
+ RootSet.setUniverse(R.DFSNodeData.size());
+ }
+
+ /// Return true if this node been visited by the DFS traversal.
+ ///
+ /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
+ /// ID. Later, SubtreeID is updated but remains valid.
+ bool isVisited(const SUnit *SU) const {
+ return R.DFSNodeData[SU->NodeNum].SubtreeID
+ != SchedDFSResult::InvalidSubtreeID;
+ }
+
+ /// Initialize this node's instruction count. We don't need to flag the node
+ /// visited until visitPostorder because the DAG cannot have cycles.
+ void visitPreorder(const SUnit *SU) {
+ R.DFSNodeData[SU->NodeNum].InstrCount =
+ SU->getInstr()->isTransient() ? 0 : 1;
+ }
+
+ /// Called once for each node after all predecessors are visited. Revisit this
+ /// node's predecessors and potentially join them now that we know the ILP of
+ /// the other predecessors.
+ void visitPostorderNode(const SUnit *SU) {
+ // Mark this node as the root of a subtree. It may be joined with its
+ // successors later.
+ R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
+ RootData RData(SU->NodeNum);
+ RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
+
+ // If any predecessors are still in their own subtree, they either cannot be
+ // joined or are large enough to remain separate. If this parent node's
+ // total instruction count is not greater than a child subtree by at least
+ // the subtree limit, then try to join it now since splitting subtrees is
+ // only useful if multiple high-pressure paths are possible.
+ unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
+ for (SUnit::const_pred_iterator
+ PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
+ if (PI->getKind() != SDep::Data)
+ continue;
+ unsigned PredNum = PI->getSUnit()->NodeNum;
+ if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
+ joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
+
+ // Either link or merge the TreeData entry from the child to the parent.
+ if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
+ // If the predecessor's parent is invalid, this is a tree edge and the
+ // current node is the parent.
+ if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
+ RootSet[PredNum].ParentNodeID = SU->NodeNum;
+ }
+ else if (RootSet.count(PredNum)) {
+ // The predecessor is not a root, but is still in the root set. This
+ // must be the new parent that it was just joined to. Note that
+ // RootSet[PredNum].ParentNodeID may either be invalid or may still be
+ // set to the original parent.
+ RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
+ RootSet.erase(PredNum);
+ }
+ }
+ RootSet[SU->NodeNum] = RData;
+ }
+
+ /// Called once for each tree edge after calling visitPostOrderNode on the
+ /// predecessor. Increment the parent node's instruction count and
+ /// preemptively join this subtree to its parent's if it is small enough.
+ void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
+ R.DFSNodeData[Succ->NodeNum].InstrCount
+ += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
+ joinPredSubtree(PredDep, Succ);
+ }
+
+ /// Add a connection for cross edges.
+ void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
+ ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
+ }
+
+ /// Set each node's subtree ID to the representative ID and record connections
+ /// between trees.
+ void finalize() {
+ SubtreeClasses.compress();
+ R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
+ assert(SubtreeClasses.getNumClasses() == RootSet.size()
+ && "number of roots should match trees");
+ for (SparseSet<RootData>::const_iterator
+ RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
+ unsigned TreeID = SubtreeClasses[RI->NodeID];
+ if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
+ R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
+ R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
+ // Note that SubInstrCount may be greater than InstrCount if we joined
+ // subtrees across a cross edge. InstrCount will be attributed to the
+ // original parent, while SubInstrCount will be attributed to the joined
+ // parent.
+ }
+ R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
+ R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
+ DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
+ for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
+ R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
+ DEBUG(dbgs() << " SU(" << Idx << ") in tree "
+ << R.DFSNodeData[Idx].SubtreeID << '\n');
+ }
+ for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
+ I = ConnectionPairs.begin(), E = ConnectionPairs.end();
+ I != E; ++I) {
+ unsigned PredTree = SubtreeClasses[I->first->NodeNum];
+ unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
+ if (PredTree == SuccTree)
+ continue;
+ unsigned Depth = I->first->getDepth();
+ addConnection(PredTree, SuccTree, Depth);
+ addConnection(SuccTree, PredTree, Depth);
+ }
+ }
+
+protected:
+ /// Join the predecessor subtree with the successor that is its DFS
+ /// parent. Apply some heuristics before joining.
+ bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
+ bool CheckLimit = true) {
+ assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
+
+ // Check if the predecessor is already joined.
+ const SUnit *PredSU = PredDep.getSUnit();
+ unsigned PredNum = PredSU->NodeNum;
+ if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
+ return false;
+
+ // Four is the magic number of successors before a node is considered a
+ // pinch point.
+ unsigned NumDataSucs = 0;
+ for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
+ SE = PredSU->Succs.end(); SI != SE; ++SI) {
+ if (SI->getKind() == SDep::Data) {
+ if (++NumDataSucs >= 4)
+ return false;
+ }
+ }
+ if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
+ return false;
+ R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
+ SubtreeClasses.join(Succ->NodeNum, PredNum);
+ return true;
+ }
+
+ /// Called by finalize() to record a connection between trees.
+ void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
+ if (!Depth)
+ return;
+
+ do {
+ SmallVectorImpl<SchedDFSResult::Connection> &Connections =
+ R.SubtreeConnections[FromTree];
+ for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
+ I = Connections.begin(), E = Connections.end(); I != E; ++I) {
+ if (I->TreeID == ToTree) {
+ I->Level = std::max(I->Level, Depth);
+ return;
+ }
+ }
+ Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
+ FromTree = R.DFSTreeData[FromTree].ParentTreeID;
+ } while (FromTree != SchedDFSResult::InvalidSubtreeID);
+ }
+};
+} // namespace llvm
+
+namespace {
+/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+ std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+public:
+ bool isComplete() const { return DFSStack.empty(); }
+
+ void follow(const SUnit *SU) {
+ DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+ }
+ void advance() { ++DFSStack.back().second; }
+
+ const SDep *backtrack() {
+ DFSStack.pop_back();
+ return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
+ }
+
+ const SUnit *getCurr() const { return DFSStack.back().first; }
+
+ SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+ SUnit::const_pred_iterator getPredEnd() const {
+ return getCurr()->Preds.end();
+ }
+};
+} // anonymous
+
+static bool hasDataSucc(const SUnit *SU) {
+ for (SUnit::const_succ_iterator
+ SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
+ if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
+ return true;
+ }
+ return false;
+}
+
+/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
+ if (!IsBottomUp)
+ llvm_unreachable("Top-down ILP metric is unimplemnted");
+
+ SchedDFSImpl Impl(*this);
+ for (ArrayRef<SUnit>::const_iterator
+ SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
+ const SUnit *SU = &*SI;
+ if (Impl.isVisited(SU) || hasDataSucc(SU))
+ continue;
+
+ SchedDAGReverseDFS DFS;
+ Impl.visitPreorder(SU);
+ DFS.follow(SU);
+ for (;;) {
+ // Traverse the leftmost path as far as possible.
+ while (DFS.getPred() != DFS.getPredEnd()) {
+ const SDep &PredDep = *DFS.getPred();
+ DFS.advance();
+ // Ignore non-data edges.
+ if (PredDep.getKind() != SDep::Data
+ || PredDep.getSUnit()->isBoundaryNode()) {
+ continue;
+ }
+ // An already visited edge is a cross edge, assuming an acyclic DAG.
+ if (Impl.isVisited(PredDep.getSUnit())) {
+ Impl.visitCrossEdge(PredDep, DFS.getCurr());
+ continue;
+ }
+ Impl.visitPreorder(PredDep.getSUnit());
+ DFS.follow(PredDep.getSUnit());
+ }
+ // Visit the top of the stack in postorder and backtrack.
+ const SUnit *Child = DFS.getCurr();
+ const SDep *PredDep = DFS.backtrack();
+ Impl.visitPostorderNode(Child);
+ if (PredDep)
+ Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
+ if (DFS.isComplete())
+ break;
+ }
+ }
+ Impl.finalize();
+}
+
+/// The root of the given SubtreeID was just scheduled. For all subtrees
+/// connected to this tree, record the depth of the connection so that the
+/// nearest connected subtrees can be prioritized.
+void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
+ for (SmallVectorImpl<Connection>::const_iterator
+ I = SubtreeConnections[SubtreeID].begin(),
+ E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
+ SubtreeConnectLevels[I->TreeID] =
+ std::max(SubtreeConnectLevels[I->TreeID], I->Level);
+ DEBUG(dbgs() << " Tree: " << I->TreeID
+ << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
+ }
+}
+
+LLVM_DUMP_METHOD
+void ILPValue::print(raw_ostream &OS) const {
+ OS << InstrCount << " / " << Length << " = ";
+ if (!Length)
+ OS << "BADILP";
+ else
+ OS << format("%g", ((double)InstrCount / Length));
+}
+
+LLVM_DUMP_METHOD
+void ILPValue::dump() const {
+ dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+LLVM_DUMP_METHOD
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+ Val.print(OS);
+ return OS;
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 000000000000..ca2881cb91e0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,100 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const ScheduleDAG *G) {
+ return G->MF.getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static bool isNodeHidden(const SUnit *Node) {
+ return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+ }
+
+ static std::string getNodeIdentifierLabel(const SUnit *Node,
+ const ScheduleDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+ OS << static_cast<const void *>(Node);
+ return R;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ static std::string getEdgeAttributes(const SUnit *Node,
+ SUnitIterator EI,
+ const ScheduleDAG *Graph) {
+ if (EI.isArtificialDep())
+ return "color=cyan,style=dashed";
+ if (EI.isCtrlDep())
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
+ static std::string getNodeAttributes(const SUnit *N,
+ const ScheduleDAG *Graph) {
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(ScheduleDAG *G,
+ GraphWriter<ScheduleDAG*> &GW) {
+ return G->addCustomGraphFeatures(GW);
+ }
+ };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+ const ScheduleDAG *G) {
+ return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+ // This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+ viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
new file mode 100644
index 000000000000..69c487033015
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -0,0 +1,240 @@
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE DebugType
+
+ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAG *SchedDAG,
+ const char *ParentDebugType)
+ : ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II),
+ DAG(SchedDAG), IssueWidth(0), IssueCount(0) {
+
+ // Determine the maximum depth of any itinerary. This determines the depth of
+ // the scoreboard. We always make the scoreboard at least 1 cycle deep to
+ // avoid dealing with the boundary condition.
+ unsigned ScoreboardDepth = 1;
+ if (ItinData && !ItinData->isEmpty()) {
+ for (unsigned idx = 0; ; ++idx) {
+ if (ItinData->isEndMarker(idx))
+ break;
+
+ const InstrStage *IS = ItinData->beginStage(idx);
+ const InstrStage *E = ItinData->endStage(idx);
+ unsigned CurCycle = 0;
+ unsigned ItinDepth = 0;
+ for (; IS != E; ++IS) {
+ unsigned StageDepth = CurCycle + IS->getCycles();
+ if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+ CurCycle += IS->getNextCycles();
+ }
+
+ // Find the next power-of-2 >= ItinDepth
+ while (ItinDepth > ScoreboardDepth) {
+ ScoreboardDepth *= 2;
+ // Don't set MaxLookAhead until we find at least one nonzero stage.
+ // This way, an itinerary with no stages has MaxLookAhead==0, which
+ // completely bypasses the scoreboard hazard logic.
+ MaxLookAhead = ScoreboardDepth;
+ }
+ }
+ }
+
+ ReservedScoreboard.reset(ScoreboardDepth);
+ RequiredScoreboard.reset(ScoreboardDepth);
+
+ // If MaxLookAhead is not set above, then we are not enabled.
+ if (!isEnabled())
+ DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n");
+ else {
+ // A nonempty itinerary must have a SchedModel.
+ IssueWidth = ItinData->SchedModel.IssueWidth;
+ DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+ << ScoreboardDepth << '\n');
+ }
+}
+
+void ScoreboardHazardRecognizer::Reset() {
+ IssueCount = 0;
+ RequiredScoreboard.reset();
+ ReservedScoreboard.reset();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+ dbgs() << "Scoreboard:\n";
+
+ unsigned last = Depth - 1;
+ while ((last > 0) && ((*this)[last] == 0))
+ last--;
+
+ for (unsigned i = 0; i <= last; i++) {
+ unsigned FUs = (*this)[i];
+ dbgs() << "\t";
+ for (int j = 31; j >= 0; j--)
+ dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+ dbgs() << '\n';
+ }
+}
+#endif
+
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+ if (IssueWidth == 0)
+ return false;
+
+ return IssueCount == IssueWidth;
+}
+
+ScheduleHazardRecognizer::HazardType
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (!ItinData || ItinData->isEmpty())
+ return NoHazard;
+
+ // Note that stalls will be negative for bottom-up scheduling.
+ int cycle = Stalls;
+
+ // Use the itinerary for the underlying instruction to check for
+ // free FU's in the scoreboard at the appropriate future cycles.
+
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID) {
+ // Don't check hazards for non-machineinstr Nodes.
+ return NoHazard;
+ }
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must find one of the stage's units free for every cycle the
+ // stage is occupied. FIXME it would be more accurate to find the
+ // same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ int StageCycle = cycle + (int)i;
+ if (StageCycle < 0)
+ continue;
+
+ if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+ assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+ "Scoreboard depth exceeded!");
+ // This stage was stalled beyond pipeline depth, so cannot conflict.
+ break;
+ }
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[StageCycle];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[StageCycle];
+ break;
+ }
+
+ if (!freeUnits) {
+ DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", ");
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+ return Hazard;
+ }
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ return NoHazard;
+}
+
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (!ItinData || ItinData->isEmpty())
+ return;
+
+ // Use the itinerary for the underlying instruction to reserve FU's
+ // in the scoreboard at the appropriate future cycles.
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ assert(MCID && "The scheduler must filter non-machineinstrs");
+ if (DAG->TII->isZeroCost(MCID->Opcode))
+ return;
+
+ ++IssueCount;
+
+ unsigned cycle = 0;
+
+ unsigned idx = MCID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
+ // We must reserve one of the stage's units for every cycle the
+ // stage is occupied. FIXME it would be more accurate to reserve
+ // the same unit free in all the cycles.
+ for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+ assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
+ "Scoreboard depth exceeded!");
+
+ unsigned freeUnits = IS->getUnits();
+ switch (IS->getReservationKind()) {
+ case InstrStage::Required:
+ // Required FUs conflict with both reserved and required ones
+ freeUnits &= ~ReservedScoreboard[cycle + i];
+ // FALLTHROUGH
+ case InstrStage::Reserved:
+ // Reserved FUs can conflict only with required ones.
+ freeUnits &= ~RequiredScoreboard[cycle + i];
+ break;
+ }
+
+ // reduce to a single unit
+ unsigned freeUnit = 0;
+ do {
+ freeUnit = freeUnits;
+ freeUnits = freeUnit & (freeUnit - 1);
+ } while (freeUnits);
+
+ if (IS->getReservationKind() == InstrStage::Required)
+ RequiredScoreboard[cycle + i] |= freeUnit;
+ else
+ ReservedScoreboard[cycle + i] |= freeUnit;
+ }
+
+ // Advance the cycle to the next stage.
+ cycle += IS->getNextCycles();
+ }
+
+ DEBUG(ReservedScoreboard.dump());
+ DEBUG(RequiredScoreboard.dump());
+}
+
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+ RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
+}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+ ReservedScoreboard.recede();
+ RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+ RequiredScoreboard.recede();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 000000000000..d888676583f3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,15009 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "dagcombine"
+
+STATISTIC(NodesCombined , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
+STATISTIC(SlicedLoads, "Number of load sliced");
+
+namespace {
+ static cl::opt<bool>
+ CombinerAA("combiner-alias-analysis", cl::Hidden,
+ cl::desc("Enable DAG combiner alias-analysis heuristics"));
+
+ static cl::opt<bool>
+ CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Enable DAG combiner's use of IR alias analysis"));
+
+ static cl::opt<bool>
+ UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
+ cl::desc("Enable DAG combiner's use of TBAA"));
+
+#ifndef NDEBUG
+ static cl::opt<std::string>
+ CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
+ cl::desc("Only use DAG-combiner alias analysis in this"
+ " function"));
+#endif
+
+ /// Hidden option to stress test load slicing, i.e., when this option
+ /// is enabled, load slicing bypasses most of its profitability guards.
+ static cl::opt<bool>
+ StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
+ cl::desc("Bypass the profitability model of load "
+ "slicing"),
+ cl::init(false));
+
+ static cl::opt<bool>
+ MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner may split indexing from loads"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+ class DAGCombiner {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ CombineLevel Level;
+ CodeGenOpt::Level OptLevel;
+ bool LegalOperations;
+ bool LegalTypes;
+ bool ForCodeSize;
+
+ /// \brief Worklist of all of the nodes that need to be simplified.
+ ///
+ /// This must behave as a stack -- new nodes to process are pushed onto the
+ /// back and when processing we pop off of the back.
+ ///
+ /// The worklist will not contain duplicates but may contain null entries
+ /// due to nodes being deleted from the underlying DAG.
+ SmallVector<SDNode *, 64> Worklist;
+
+ /// \brief Mapping from an SDNode to its position on the worklist.
+ ///
+ /// This is used to find and remove nodes from the worklist (by nulling
+ /// them) when they are deleted from the underlying DAG. It relies on
+ /// stable indices of nodes within the worklist.
+ DenseMap<SDNode *, unsigned> WorklistMap;
+
+ /// \brief Set of nodes which have been combined (at least once).
+ ///
+ /// This is used to allow us to reliably add any operands of a DAG node
+ /// which have not yet been combined to the worklist.
+ SmallPtrSet<SDNode *, 32> CombinedNodes;
+
+ // AA - Used for DAG load/store alias analysis.
+ AliasAnalysis &AA;
+
+ /// When an instruction is simplified, add all users of the instruction to
+ /// the work lists because they might get more simplified now.
+ void AddUsersToWorklist(SDNode *N) {
+ for (SDNode *Node : N->uses())
+ AddToWorklist(Node);
+ }
+
+ /// Call the node-specific routine that folds each particular type of node.
+ SDValue visit(SDNode *N);
+
+ public:
+ /// Add to the worklist making sure its instance is at the back (next to be
+ /// processed.)
+ void AddToWorklist(SDNode *N) {
+ // Skip handle nodes as they can't usefully be combined and confuse the
+ // zero-use deletion strategy.
+ if (N->getOpcode() == ISD::HANDLENODE)
+ return;
+
+ if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
+ Worklist.push_back(N);
+ }
+
+ /// Remove all instances of N from the worklist.
+ void removeFromWorklist(SDNode *N) {
+ CombinedNodes.erase(N);
+
+ auto It = WorklistMap.find(N);
+ if (It == WorklistMap.end())
+ return; // Not in the worklist.
+
+ // Null out the entry rather than erasing it to avoid a linear operation.
+ Worklist[It->second] = nullptr;
+ WorklistMap.erase(It);
+ }
+
+ void deleteAndRecombine(SDNode *N);
+ bool recursivelyDeleteUnusedNodes(SDNode *N);
+
+ /// Replaces all uses of the results of one DAG node with new values.
+ SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo = true);
+
+ /// Replaces all uses of the results of one DAG node with new values.
+ SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+ return CombineTo(N, &Res, 1, AddTo);
+ }
+
+ /// Replaces all uses of the results of one DAG node with new values.
+ SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+ bool AddTo = true) {
+ SDValue To[] = { Res0, Res1 };
+ return CombineTo(N, To, 2, AddTo);
+ }
+
+ void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+ private:
+
+ /// Check the specified integer node value to see if it can be simplified or
+ /// if things it uses can be simplified by bit propagation.
+ /// If so, return true.
+ bool SimplifyDemandedBits(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
+ return SimplifyDemandedBits(Op, Demanded);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+ bool CombineToPreIndexedLoadStore(SDNode *N);
+ bool CombineToPostIndexedLoadStore(SDNode *N);
+ SDValue SplitIndexingFromLoad(LoadSDNode *LD);
+ bool SliceUpLoad(SDNode *N);
+
+ /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+ /// load.
+ ///
+ /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+ /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+ /// \param EltNo index of the vector element to load.
+ /// \param OriginalLoad load that EVE came from to be replaced.
+ /// \returns EVE on success SDValue() on failure.
+ SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
+ void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+ SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+ SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+ SDValue PromoteIntBinOp(SDValue Op);
+ SDValue PromoteIntShiftOp(SDValue Op);
+ SDValue PromoteExtend(SDValue Op);
+ bool PromoteLoad(SDValue Op);
+
+ void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
+ SDValue ExtLoad, const SDLoc &DL,
+ ISD::NodeType ExtType);
+
+ /// Call the node-specific routine that knows how to fold each
+ /// particular type of node. If that doesn't do anything, try the
+ /// target-specific DAG combines.
+ SDValue combine(SDNode *N);
+
+ // Visitation implementation - Implement dag node combining for different
+ // node types. The semantics are as follows:
+ // Return Value:
+ // SDValue.getNode() == 0 - No change was made
+ // SDValue.getNode() == N - N was replaced, is dead and has been handled.
+ // otherwise - N should be replaced by the returned Operand.
+ //
+ SDValue visitTokenFactor(SDNode *N);
+ SDValue visitMERGE_VALUES(SDNode *N);
+ SDValue visitADD(SDNode *N);
+ SDValue visitSUB(SDNode *N);
+ SDValue visitADDC(SDNode *N);
+ SDValue visitSUBC(SDNode *N);
+ SDValue visitADDE(SDNode *N);
+ SDValue visitSUBE(SDNode *N);
+ SDValue visitMUL(SDNode *N);
+ SDValue useDivRem(SDNode *N);
+ SDValue visitSDIV(SDNode *N);
+ SDValue visitUDIV(SDNode *N);
+ SDValue visitREM(SDNode *N);
+ SDValue visitMULHU(SDNode *N);
+ SDValue visitMULHS(SDNode *N);
+ SDValue visitSMUL_LOHI(SDNode *N);
+ SDValue visitUMUL_LOHI(SDNode *N);
+ SDValue visitSMULO(SDNode *N);
+ SDValue visitUMULO(SDNode *N);
+ SDValue visitIMINMAX(SDNode *N);
+ SDValue visitAND(SDNode *N);
+ SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitOR(SDNode *N);
+ SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitXOR(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N);
+ SDValue visitSHL(SDNode *N);
+ SDValue visitSRA(SDNode *N);
+ SDValue visitSRL(SDNode *N);
+ SDValue visitRotate(SDNode *N);
+ SDValue visitBSWAP(SDNode *N);
+ SDValue visitBITREVERSE(SDNode *N);
+ SDValue visitCTLZ(SDNode *N);
+ SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTTZ(SDNode *N);
+ SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
+ SDValue visitCTPOP(SDNode *N);
+ SDValue visitSELECT(SDNode *N);
+ SDValue visitVSELECT(SDNode *N);
+ SDValue visitSELECT_CC(SDNode *N);
+ SDValue visitSETCC(SDNode *N);
+ SDValue visitSETCCE(SDNode *N);
+ SDValue visitSIGN_EXTEND(SDNode *N);
+ SDValue visitZERO_EXTEND(SDNode *N);
+ SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+ SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
+ SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
+ SDValue visitTRUNCATE(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
+ SDValue visitBUILD_PAIR(SDNode *N);
+ SDValue visitFADD(SDNode *N);
+ SDValue visitFSUB(SDNode *N);
+ SDValue visitFMUL(SDNode *N);
+ SDValue visitFMA(SDNode *N);
+ SDValue visitFDIV(SDNode *N);
+ SDValue visitFREM(SDNode *N);
+ SDValue visitFSQRT(SDNode *N);
+ SDValue visitFCOPYSIGN(SDNode *N);
+ SDValue visitSINT_TO_FP(SDNode *N);
+ SDValue visitUINT_TO_FP(SDNode *N);
+ SDValue visitFP_TO_SINT(SDNode *N);
+ SDValue visitFP_TO_UINT(SDNode *N);
+ SDValue visitFP_ROUND(SDNode *N);
+ SDValue visitFP_ROUND_INREG(SDNode *N);
+ SDValue visitFP_EXTEND(SDNode *N);
+ SDValue visitFNEG(SDNode *N);
+ SDValue visitFABS(SDNode *N);
+ SDValue visitFCEIL(SDNode *N);
+ SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFLOOR(SDNode *N);
+ SDValue visitFMINNUM(SDNode *N);
+ SDValue visitFMAXNUM(SDNode *N);
+ SDValue visitBRCOND(SDNode *N);
+ SDValue visitBR_CC(SDNode *N);
+ SDValue visitLOAD(SDNode *N);
+
+ SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
+ SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+
+ SDValue visitSTORE(SDNode *N);
+ SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+ SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue visitBUILD_VECTOR(SDNode *N);
+ SDValue visitCONCAT_VECTORS(SDNode *N);
+ SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
+ SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitSCALAR_TO_VECTOR(SDNode *N);
+ SDValue visitINSERT_SUBVECTOR(SDNode *N);
+ SDValue visitMLOAD(SDNode *N);
+ SDValue visitMSTORE(SDNode *N);
+ SDValue visitMGATHER(SDNode *N);
+ SDValue visitMSCATTER(SDNode *N);
+ SDValue visitFP_TO_FP16(SDNode *N);
+ SDValue visitFP16_TO_FP(SDNode *N);
+
+ SDValue visitFADDForFMACombine(SDNode *N);
+ SDValue visitFSUBForFMACombine(SDNode *N);
+ SDValue visitFMULForFMACombine(SDNode *N);
+
+ SDValue XformToShuffleWithZero(SDNode *N);
+ SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
+ SDValue RHS);
+
+ SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
+
+ bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+ SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+ SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
+ SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare = false);
+ SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+ const SDLoc &DL, bool foldBooleans = true);
+
+ bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) const;
+ bool isOneUseSetCC(SDValue N) const;
+
+ SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp);
+ SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue CombineExtLoad(SDNode *N);
+ SDValue combineRepeatedFPDivisors(SDNode *N);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue BuildSDIV(SDNode *N);
+ SDValue BuildSDIVPow2(SDNode *N);
+ SDValue BuildUDIV(SDNode *N);
+ SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+ SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
+ SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags, bool Reciprocal);
+ SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+ SDNodeFlags *Flags, bool Reciprocal);
+ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits = true);
+ SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+ SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
+ SDValue InnerPos, SDValue InnerNeg,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL);
+ SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
+ SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
+ SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
+
+ SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+ /// Walk up chain skipping non-aliasing memory nodes,
+ /// looking for aliasing nodes and adding them to the Aliases vector.
+ void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVectorImpl<SDValue> &Aliases);
+
+ /// Return true if there is any possibility that the two addresses overlap.
+ bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
+
+ /// Walk up chain skipping non-aliasing memory nodes, looking for a better
+ /// chain (aliasing node.)
+ SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+ /// Try to replace a store and any possibly adjacent stores on
+ /// consecutive chains with better chains. Return true only if St is
+ /// replaced.
+ ///
+ /// Notice that other chains may still be replaced even if the function
+ /// returns false.
+ bool findBetterNeighborChains(StoreSDNode *St);
+
+ /// Match "(X shl/srl V1) & V2" where V2 may not be present.
+ bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
+
+ /// Holds a pointer to an LSBaseSDNode as well as information on where it
+ /// is located in a sequence of memory operations connected by a chain.
+ struct MemOpLink {
+ MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
+ MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ // Ptr to the mem node.
+ LSBaseSDNode *MemNode;
+ // Offset from the base ptr.
+ int64_t OffsetFromBase;
+ // What is the sequence number of this mem node.
+ // Lowest mem operand in the DAG starts at zero.
+ unsigned SequenceNum;
+ };
+
+ /// This is a helper function for visitMUL to check the profitability
+ /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+ /// MulNode is the original multiply, AddNode is (add x, c1),
+ /// and ConstNode is c2.
+ bool isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode);
+
+ /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
+ /// constant build_vector of the stored constant values in Stores.
+ SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
+ ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains,
+ EVT Ty) const;
+
+ /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
+ /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
+ /// the type of the loaded value to be extended. LoadedVT returns the type
+ /// of the original loaded value. NarrowLoad returns whether the load would
+ /// need to be narrowed in order to match.
+ bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad);
+
+ /// This is a helper function for MergeConsecutiveStores. When the source
+ /// elements of the consecutive stores are all constants or all extracted
+ /// vector elements, try to merge them into one larger store.
+ /// \return True if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector);
+
+ /// This is a helper function for MergeConsecutiveStores.
+ /// Stores that may be merged are placed in StoreNodes.
+ /// Loads that may alias with those stores are placed in AliasLoadNodes.
+ void getStoreMergeAndAliasCandidates(
+ StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
+ SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
+
+ /// Helper function for MergeConsecutiveStores. Checks if
+ /// Candidate stores have indirect dependency through their
+ /// operands. \return True if safe to merge
+ bool checkMergeStoreCandidatesForDependencies(
+ SmallVectorImpl<MemOpLink> &StoreNodes);
+
+ /// Merge consecutive store operations into a wide store.
+ /// This optimization uses wide integers or vectors when possible.
+ /// \return True if some memory operations were changed.
+ bool MergeConsecutiveStores(StoreSDNode *N);
+
+ /// \brief Try to transform a truncation where C is a constant:
+ /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
+ ///
+ /// \p N needs to be a truncation and its first operand an AND. Other
+ /// requirements are checked by the function (e.g. that trunc is
+ /// single-use) and if missed an empty SDValue is returned.
+ SDValue distributeTruncateThroughAnd(SDNode *N);
+
+ public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+ OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
+ ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
+ }
+
+ /// Runs the dag combiner on all nodes in the work list
+ void Run(CombineLevel AtLevel);
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+ /// Returns a type large enough to hold any valid shift amount - before type
+ /// legalization these can be huge.
+ EVT getShiftAmountTy(EVT LHSTy) {
+ assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+ if (LHSTy.isVector())
+ return LHSTy;
+ auto &DL = DAG.getDataLayout();
+ return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
+ : TLI.getPointerTy(DL);
+ }
+
+ /// This method returns true if we are running before type legalization or
+ /// if the specified VT is legal.
+ bool isTypeLegal(const EVT &VT) {
+ if (!LegalTypes) return true;
+ return TLI.isTypeLegal(VT);
+ }
+
+ /// Convenience wrapper around TargetLowering::getSetCCResultType
+ EVT getSetCCResultType(EVT VT) const {
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ }
+ };
+}
+
+
+namespace {
+/// This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorklistRemover : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+public:
+ explicit WorklistRemover(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ DC.removeFromWorklist(N);
+ }
+};
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->AddToWorklist(N);
+}
+
+void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
+ ((DAGCombiner*)DC)->removeFromWorklist(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+ return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::deleteAndRecombine(SDNode *N) {
+ removeFromWorklist(N);
+
+ // If the operands of this node are only used by the node, they will now be
+ // dead. Make sure to re-visit them and recursively delete dead nodes.
+ for (const SDValue &Op : N->ops())
+ // For an operand generating multiple values, one of the values may
+ // become dead allowing further simplification (e.g. split index
+ // arithmetic from an indexed load).
+ if (Op->hasOneUse() || Op->getNumValues() > 1)
+ AddToWorklist(Op.getNode());
+
+ DAG.DeleteNode(N);
+}
+
+/// Return 1 if we can compute the negated form of the specified expression for
+/// the same cost as the expression itself, or 2 if we can compute the negated
+/// form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+ const TargetLowering &TLI,
+ const TargetOptions *Options,
+ unsigned Depth = 0) {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return 2;
+
+ // Don't allow anything with multiple uses.
+ if (!Op.hasOneUse()) return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > 6) return 0;
+
+ switch (Op.getOpcode()) {
+ default: return false;
+ case ISD::ConstantFP:
+ // Don't invert constant FP values after legalize. The negated constant
+ // isn't necessarily legal.
+ return LegalOperations ? 0 : 1;
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ if (!Options->UnsafeFPMath) return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
+ return 0;
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!Options->UnsafeFPMath) return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ if (Options->HonorSignDependentRoundingFPMath()) return 0;
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+ Options, Depth + 1))
+ return V;
+
+ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+ Depth + 1);
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
+ Depth + 1);
+ }
+}
+
+/// If isNegatibleForFree returns true, return the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, unsigned Depth = 0) {
+ const TargetOptions &Options = DAG.getTarget().Options;
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+ // Don't allow anything with multiple uses.
+ assert(Op.hasOneUse() && "Unknown reuse!");
+
+ assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+
+ const SDNodeFlags *Flags = Op.getNode()->getFlags();
+
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Unknown code");
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
+ }
+ case ISD::FADD:
+ // FIXME: determine better conditions for this xform.
+ assert(Options.UnsafeFPMath);
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(), &Options, Depth+1))
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1), Flags);
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(0), Flags);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ assert(Options.UnsafeFPMath);
+
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (N0CFP->isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0), Flags);
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ assert(!Options.HonorSignDependentRoundingFPMath());
+
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+ DAG.getTargetLoweringInfo(), &Options, Depth+1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1), Flags);
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0),
+ GetNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, Depth+1), Flags);
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, Depth+1),
+ Op.getOperand(1));
+ }
+}
+
+// Return true if this node is a setcc, or is a select_cc
+// that selects between the target values used for true and false, making it
+// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
+// the appropriate nodes based on the type of node we are checking. This
+// simplifies life a bit for the callers.
+bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) const {
+ if (N.getOpcode() == ISD::SETCC) {
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(2);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::SELECT_CC ||
+ !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
+ !TLI.isConstFalseVal(N.getOperand(3).getNode()))
+ return false;
+
+ if (TLI.getBooleanContents(N.getValueType()) ==
+ TargetLowering::UndefinedBooleanContent)
+ return false;
+
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
+}
+
+/// Return true if this is a SetCC-equivalent operation with only one use.
+/// If this is true, it allows the users to invert the operation for free when
+/// it is profitable to do so.
+bool DAGCombiner::isOneUseSetCC(SDValue N) const {
+ SDValue N0, N1, N2;
+ if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ return true;
+ return false;
+}
+
+// \brief Returns the SDNode if it is a constant float BuildVector
+// or constant float.
+static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
+ if (isa<ConstantFPSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+ return N.getNode();
+ return nullptr;
+}
+
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// int.
+static ConstantSDNode *isConstOrConstSplat(SDValue N) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
+
+ // BuildVectors can truncate their operands. Ignore that case here.
+ // FIXME: We blindly ignore splats which include undef which is overly
+ // pessimistic.
+ if (CN && UndefElements.none() &&
+ CN->getValueType(0) == N.getValueType().getScalarType())
+ return CN;
+ }
+
+ return nullptr;
+}
+
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// float.
+static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+
+ if (CN && UndefElements.none())
+ return CN;
+ }
+
+ return nullptr;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == Opc) {
+ if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ return SDValue();
+ }
+ if (N0.hasOneUse()) {
+ // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
+ // use
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+ if (!OpNode.getNode())
+ return SDValue();
+ AddToWorklist(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ }
+ }
+ }
+
+ if (N1.getOpcode() == Opc) {
+ if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
+ if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
+ return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ return SDValue();
+ }
+ if (N1.hasOneUse()) {
+ // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
+ // use
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
+ if (!OpNode.getNode())
+ return SDValue();
+ AddToWorklist(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+ bool AddTo) {
+ assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.1 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ To[0].getNode()->dump(&DAG);
+ dbgs() << " and " << NumTo-1 << " other values\n");
+ for (unsigned i = 0, e = NumTo; i != e; ++i)
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
+ "Cannot combine value to value of different type!");
+
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesWith(N, To);
+ if (AddTo) {
+ // Push the new nodes and any users onto the worklist
+ for (unsigned i = 0, e = NumTo; i != e; ++i) {
+ if (To[i].getNode()) {
+ AddToWorklist(To[i].getNode());
+ AddUsersToWorklist(To[i].getNode());
+ }
+ }
+ }
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (N->use_empty())
+ deleteAndRecombine(N);
+ return SDValue(N, 0);
+}
+
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+ // Replace all uses. If any nodes become isomorphic to other nodes and
+ // are deleted, make sure to remove them from our worklist.
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
+
+ // Push the new node and any (possibly new) users onto the worklist.
+ AddToWorklist(TLO.New.getNode());
+ AddUsersToWorklist(TLO.New.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node.
+ if (TLO.Old.getNode()->use_empty())
+ deleteAndRecombine(TLO.Old.getNode());
+}
+
+/// Check the specified integer node value to see if it can be simplified or if
+/// things it uses can be simplified by bit propagation. If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+ TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+ APInt KnownZero, KnownOne;
+ if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ return false;
+
+ // Revisit the node.
+ AddToWorklist(Op.getNode());
+
+ // Replace the old value with the new one.
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.2 ";
+ TLO.Old.getNode()->dump(&DAG);
+ dbgs() << "\nWith: ";
+ TLO.New.getNode()->dump(&DAG);
+ dbgs() << '\n');
+
+ CommitTargetLoweringOpt(TLO);
+ return true;
+}
+
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+ SDLoc dl(Load);
+ EVT VT = Load->getValueType(0);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+
+ DEBUG(dbgs() << "\nReplacing.9 ";
+ Load->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Trunc.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
+ deleteAndRecombine(Load);
+ AddToWorklist(Trunc.getNode());
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+ Replace = false;
+ SDLoc dl(Op);
+ if (ISD::isUNINDEXEDLoad(Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
+ : LD->getExtensionType();
+ Replace = true;
+ return DAG.getExtLoad(ExtType, dl, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ MemVT, LD->getMemOperand());
+ }
+
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ default: break;
+ case ISD::AssertSext:
+ return DAG.getNode(ISD::AssertSext, dl, PVT,
+ SExtPromoteOperand(Op.getOperand(0), PVT),
+ Op.getOperand(1));
+ case ISD::AssertZext:
+ return DAG.getNode(ISD::AssertZext, dl, PVT,
+ ZExtPromoteOperand(Op.getOperand(0), PVT),
+ Op.getOperand(1));
+ case ISD::Constant: {
+ unsigned ExtOpc =
+ Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOpc, dl, PVT, Op);
+ }
+ }
+
+ if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+ return SDValue();
+ return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+ if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+ return SDValue();
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (!NewOp.getNode())
+ return SDValue();
+ AddToWorklist(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+ DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ bool Replace = false;
+ SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+ if (!NewOp.getNode())
+ return SDValue();
+ AddToWorklist(NewOp.getNode());
+
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+ return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+}
+
+/// Promote the specified integer binary operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ bool Replace0 = false;
+ SDValue N0 = Op.getOperand(0);
+ SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+ if (!NN0.getNode())
+ return SDValue();
+
+ bool Replace1 = false;
+ SDValue N1 = Op.getOperand(1);
+ SDValue NN1;
+ if (N0 == N1)
+ NN1 = NN0;
+ else {
+ NN1 = PromoteOperand(N1, PVT, Replace1);
+ if (!NN1.getNode())
+ return SDValue();
+ }
+
+ AddToWorklist(NN0.getNode());
+ if (NN1.getNode())
+ AddToWorklist(NN1.getNode());
+
+ if (Replace0)
+ ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+ if (Replace1)
+ ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ SDLoc dl(Op);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Opc, dl, PVT, NN0, NN1));
+ }
+ return SDValue();
+}
+
+/// Promote the specified integer shift operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ bool Replace = false;
+ SDValue N0 = Op.getOperand(0);
+ if (Opc == ISD::SRA)
+ N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+ else if (Opc == ISD::SRL)
+ N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+ else
+ N0 = PromoteOperand(N0, PVT, Replace);
+ if (!N0.getNode())
+ return SDValue();
+
+ AddToWorklist(N0.getNode());
+ if (Replace)
+ ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ SDLoc dl(Op);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return SDValue();
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ DEBUG(dbgs() << "\nPromoting ";
+ Op.getNode()->dump(&DAG));
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
+ }
+ return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+ if (!LegalOperations)
+ return false;
+
+ if (!ISD::isUNINDEXEDLoad(Op.getNode()))
+ return false;
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector() || !VT.isInteger())
+ return false;
+
+ // If operation type is 'undesirable', e.g. i16 on x86, consider
+ // promoting it.
+ unsigned Opc = Op.getOpcode();
+ if (TLI.isTypeDesirableForOp(Opc, VT))
+ return false;
+
+ EVT PVT = VT;
+ // Consult target whether it is a good idea to promote this operation and
+ // what's the right type to promote it to.
+ if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+ assert(PVT != VT && "Don't know what type to promote to!");
+
+ SDLoc dl(Op);
+ SDNode *N = Op.getNode();
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT MemVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
+ : LD->getExtensionType();
+ SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+ LD->getChain(), LD->getBasePtr(),
+ MemVT, LD->getMemOperand());
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+
+ DEBUG(dbgs() << "\nPromoting ";
+ N->dump(&DAG);
+ dbgs() << "\nTo: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
+ deleteAndRecombine(N);
+ AddToWorklist(Result.getNode());
+ return true;
+ }
+ return false;
+}
+
+/// \brief Recursively delete a node which has no uses and any operands for
+/// which it is the only use.
+///
+/// Note that this both deletes the nodes and removes them from the worklist.
+/// It also adds any nodes who have had a user deleted to the worklist as they
+/// may now have only one use and subject to other combines.
+bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
+ if (!N->use_empty())
+ return false;
+
+ SmallSetVector<SDNode *, 16> Nodes;
+ Nodes.insert(N);
+ do {
+ N = Nodes.pop_back_val();
+ if (!N)
+ continue;
+
+ if (N->use_empty()) {
+ for (const SDValue &ChildN : N->op_values())
+ Nodes.insert(ChildN.getNode());
+
+ removeFromWorklist(N);
+ DAG.DeleteNode(N);
+ } else {
+ AddToWorklist(N);
+ }
+ } while (!Nodes.empty());
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+ // set the instance variables, so that the various visit routines may use it.
+ Level = AtLevel;
+ LegalOperations = Level >= AfterLegalizeVectorOps;
+ LegalTypes = Level >= AfterLegalizeTypes;
+
+ // Add all the dag nodes to the worklist.
+ for (SDNode &Node : DAG.allnodes())
+ AddToWorklist(&Node);
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+
+ // While the worklist isn't empty, find a node and try to combine it.
+ while (!WorklistMap.empty()) {
+ SDNode *N;
+ // The Worklist holds the SDNodes in order, but it may contain null entries.
+ do {
+ N = Worklist.pop_back_val();
+ } while (!N);
+
+ bool GoodWorklistEntry = WorklistMap.erase(N);
+ (void)GoodWorklistEntry;
+ assert(GoodWorklistEntry &&
+ "Found a worklist entry without a corresponding map entry!");
+
+ // If N has no uses, it is dead. Make sure to revisit all N's operands once
+ // N is deleted from the DAG, since they too may now be dead or may have a
+ // reduced number of uses, allowing other xforms.
+ if (recursivelyDeleteUnusedNodes(N))
+ continue;
+
+ WorklistRemover DeadNodes(*this);
+
+ // If this combine is running after legalizing the DAG, re-legalize any
+ // nodes pulled off the worklist.
+ if (Level == AfterLegalizeDAG) {
+ SmallSetVector<SDNode *, 16> UpdatedNodes;
+ bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
+
+ for (SDNode *LN : UpdatedNodes) {
+ AddToWorklist(LN);
+ AddUsersToWorklist(LN);
+ }
+ if (!NIsValid)
+ continue;
+ }
+
+ DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
+
+ // Add any operands of the new node which have not yet been combined to the
+ // worklist as well. Because the worklist uniques things already, this
+ // won't repeatedly process the same operand.
+ CombinedNodes.insert(N);
+ for (const SDValue &ChildN : N->op_values())
+ if (!CombinedNodes.count(ChildN.getNode()))
+ AddToWorklist(ChildN.getNode());
+
+ SDValue RV = combine(N);
+
+ if (!RV.getNode())
+ continue;
+
+ ++NodesCombined;
+
+ // If we get back the same node we passed in, rather than a new node or
+ // zero, we know that the node must have defined multiple values and
+ // CombineTo was used. Since CombineTo takes care of the worklist
+ // mechanics for us, we have no work to do in this case.
+ if (RV.getNode() == N)
+ continue;
+
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned new node!");
+
+ DEBUG(dbgs() << " ... into: ";
+ RV.getNode()->dump(&DAG));
+
+ if (N->getNumValues() == RV.getNode()->getNumValues())
+ DAG.ReplaceAllUsesWith(N, RV.getNode());
+ else {
+ assert(N->getValueType(0) == RV.getValueType() &&
+ N->getNumValues() == 1 && "Type mismatch");
+ SDValue OpV = RV;
+ DAG.ReplaceAllUsesWith(N, &OpV);
+ }
+
+ // Push the new node and any users onto the worklist
+ AddToWorklist(RV.getNode());
+ AddUsersToWorklist(RV.getNode());
+
+ // Finally, if the node is now dead, remove it from the graph. The node
+ // may not be dead if the replacement process recursively simplified to
+ // something else needing this node. This will also take care of adding any
+ // operands which have lost a user to the worklist.
+ recursivelyDeleteUnusedNodes(N);
+ }
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ DAG.setRoot(Dummy.getValue());
+ DAG.RemoveDeadNodes();
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: return visitTokenFactor(N);
+ case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
+ case ISD::ADD: return visitADD(N);
+ case ISD::SUB: return visitSUB(N);
+ case ISD::ADDC: return visitADDC(N);
+ case ISD::SUBC: return visitSUBC(N);
+ case ISD::ADDE: return visitADDE(N);
+ case ISD::SUBE: return visitSUBE(N);
+ case ISD::MUL: return visitMUL(N);
+ case ISD::SDIV: return visitSDIV(N);
+ case ISD::UDIV: return visitUDIV(N);
+ case ISD::SREM:
+ case ISD::UREM: return visitREM(N);
+ case ISD::MULHU: return visitMULHU(N);
+ case ISD::MULHS: return visitMULHS(N);
+ case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
+ case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
+ case ISD::SMULO: return visitSMULO(N);
+ case ISD::UMULO: return visitUMULO(N);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: return visitIMINMAX(N);
+ case ISD::AND: return visitAND(N);
+ case ISD::OR: return visitOR(N);
+ case ISD::XOR: return visitXOR(N);
+ case ISD::SHL: return visitSHL(N);
+ case ISD::SRA: return visitSRA(N);
+ case ISD::SRL: return visitSRL(N);
+ case ISD::ROTR:
+ case ISD::ROTL: return visitRotate(N);
+ case ISD::BSWAP: return visitBSWAP(N);
+ case ISD::BITREVERSE: return visitBITREVERSE(N);
+ case ISD::CTLZ: return visitCTLZ(N);
+ case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
+ case ISD::CTTZ: return visitCTTZ(N);
+ case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
+ case ISD::CTPOP: return visitCTPOP(N);
+ case ISD::SELECT: return visitSELECT(N);
+ case ISD::VSELECT: return visitVSELECT(N);
+ case ISD::SELECT_CC: return visitSELECT_CC(N);
+ case ISD::SETCC: return visitSETCC(N);
+ case ISD::SETCCE: return visitSETCCE(N);
+ case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
+ case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
+ case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
+ case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
+ case ISD::TRUNCATE: return visitTRUNCATE(N);
+ case ISD::BITCAST: return visitBITCAST(N);
+ case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
+ case ISD::FADD: return visitFADD(N);
+ case ISD::FSUB: return visitFSUB(N);
+ case ISD::FMUL: return visitFMUL(N);
+ case ISD::FMA: return visitFMA(N);
+ case ISD::FDIV: return visitFDIV(N);
+ case ISD::FREM: return visitFREM(N);
+ case ISD::FSQRT: return visitFSQRT(N);
+ case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
+ case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
+ case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
+ case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
+ case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
+ case ISD::FP_ROUND: return visitFP_ROUND(N);
+ case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
+ case ISD::FP_EXTEND: return visitFP_EXTEND(N);
+ case ISD::FNEG: return visitFNEG(N);
+ case ISD::FABS: return visitFABS(N);
+ case ISD::FFLOOR: return visitFFLOOR(N);
+ case ISD::FMINNUM: return visitFMINNUM(N);
+ case ISD::FMAXNUM: return visitFMAXNUM(N);
+ case ISD::FCEIL: return visitFCEIL(N);
+ case ISD::FTRUNC: return visitFTRUNC(N);
+ case ISD::BRCOND: return visitBRCOND(N);
+ case ISD::BR_CC: return visitBR_CC(N);
+ case ISD::LOAD: return visitLOAD(N);
+ case ISD::STORE: return visitSTORE(N);
+ case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
+ case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+ case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
+ case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
+ case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
+ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
+ case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
+ case ISD::MGATHER: return visitMGATHER(N);
+ case ISD::MLOAD: return visitMLOAD(N);
+ case ISD::MSCATTER: return visitMSCATTER(N);
+ case ISD::MSTORE: return visitMSTORE(N);
+ case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
+ case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+ SDValue RV = visit(N);
+
+ // If nothing happened, try a target-specific DAG combine.
+ if (!RV.getNode()) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Node was deleted but visit returned NULL!");
+
+ if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+ TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+ // Expose the DAG combiner to the target combiner impls.
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+
+ RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+ }
+ }
+
+ // If nothing happened still, try promoting the operation.
+ if (!RV.getNode()) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ RV = PromoteIntBinOp(SDValue(N, 0));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ RV = PromoteIntShiftOp(SDValue(N, 0));
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ RV = PromoteExtend(SDValue(N, 0));
+ break;
+ case ISD::LOAD:
+ if (PromoteLoad(SDValue(N, 0)))
+ RV = SDValue(N, 0);
+ break;
+ }
+ }
+
+ // If N is a commutative binary node, try commuting it to enable more
+ // sdisel CSE.
+ if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ N->getNumValues() == 1) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Constant operands are canonicalized to RHS.
+ if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+ SDValue Ops[] = {N1, N0};
+ SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+ N->getFlags());
+ if (CSENode)
+ return SDValue(CSENode, 0);
+ }
+ }
+
+ return RV;
+}
+
+/// Given a node, return its input chain if it has one, otherwise return a null
+/// sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+ if (unsigned NumOps = N->getNumOperands()) {
+ if (N->getOperand(0).getValueType() == MVT::Other)
+ return N->getOperand(0);
+ if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+ return N->getOperand(NumOps-1);
+ for (unsigned i = 1; i < NumOps-1; ++i)
+ if (N->getOperand(i).getValueType() == MVT::Other)
+ return N->getOperand(i);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+ // If N has two operands, where one has an input chain equal to the other,
+ // the 'other' chain is redundant.
+ if (N->getNumOperands() == 2) {
+ if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+ return N->getOperand(0);
+ if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+ return N->getOperand(1);
+ }
+
+ SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallPtrSet<SDNode*, 16> SeenOps;
+ bool Changed = false; // If we should replace this token factor.
+
+ // Start out with this token factor.
+ TFs.push_back(N);
+
+ // Iterate through token factors. The TFs grows when new token factors are
+ // encountered.
+ for (unsigned i = 0; i < TFs.size(); ++i) {
+ SDNode *TF = TFs[i];
+
+ // Check each of the operands.
+ for (const SDValue &Op : TF->op_values()) {
+
+ switch (Op.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry tokens don't need to be added to the list. They are
+ // redundant.
+ Changed = true;
+ break;
+
+ case ISD::TokenFactor:
+ if (Op.hasOneUse() &&
+ std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+ // Queue up for processing.
+ TFs.push_back(Op.getNode());
+ // Clean up in case the token factor is removed.
+ AddToWorklist(Op.getNode());
+ Changed = true;
+ break;
+ }
+ // Fall thru
+
+ default:
+ // Only add if it isn't already in the list.
+ if (SeenOps.insert(Op.getNode()).second)
+ Ops.push_back(Op);
+ else
+ Changed = true;
+ break;
+ }
+ }
+ }
+
+ SDValue Result;
+
+ // If we've changed things around then replace token factor.
+ if (Changed) {
+ if (Ops.empty()) {
+ // The entry token is the only possible outcome.
+ Result = DAG.getEntryNode();
+ } else {
+ // New and improved token factor.
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ }
+
+ // Add users to worklist if AA is enabled, since it may introduce
+ // a lot of new chained token factors while removing memory deps.
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+ return CombineTo(N, Result, UseAA /*add to worklist*/);
+ }
+
+ return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+ WorklistRemover DeadNodes(*this);
+ // Replacing results may cause a different MERGE_VALUES to suddenly
+ // be CSE'd with N, and carry its uses with it. Iterate until no
+ // uses remain, to ensure that the node can be safely deleted.
+ // First add the users of this node to the work list so that they
+ // can be tried again once they have new operands.
+ AddUsersToWorklist(N);
+ do {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
+ } while (!N->use_empty());
+ deleteAndRecombine(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
+/// ConstantSDNode pointer else nullptr.
+static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
+ return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (add x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ }
+
+ // fold (add x, undef) -> undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ // canonicalize constant to RHS
+ if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
+ // fold (add c1, c2) -> c1+c2
+ return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT,
+ N0.getNode(), N1.getNode());
+ }
+ // fold (add x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+ // fold ((c1-A)+c2) -> (c1+c2)-A
+ if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
+ if (N0.getOpcode() == ISD::SUB)
+ if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(N1C->getAPIntValue()+
+ N0C->getAPIntValue(), DL, VT),
+ N0.getOperand(1));
+ }
+ }
+ // reassociate add
+ if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
+ return RADD;
+ // fold ((0-A) + B) -> B-A
+ if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
+ // fold (A + (0-B)) -> A-B
+ if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
+ // fold (A+(B-A)) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+ return N1.getOperand(0);
+ // fold ((B-A)+A) -> B
+ if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+ return N0.getOperand(0);
+ // fold (A+(B-(A+C))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(0))
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(1));
+ // fold (A+(B-(C+A))) to (B-C)
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+ N0 == N1.getOperand(1).getOperand(1))
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+ N1.getOperand(1).getOperand(0));
+ // fold (A+((B-A)+or-C)) to (B+or-C)
+ if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+ N1.getOperand(0).getOpcode() == ISD::SUB &&
+ N0 == N1.getOperand(0).getOperand(1))
+ return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
+ N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+ // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N10 = N1.getOperand(0);
+ SDValue N11 = N1.getOperand(1);
+
+ if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+ DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
+ DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
+ }
+
+ if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (a+b) -> (a|b) iff a and b share no bits.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+ VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+
+ // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+ if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullConstant(N1.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
+ DAG.getNode(ISD::SHL, SDLoc(N), VT,
+ N1.getOperand(0).getOperand(1),
+ N1.getOperand(1)));
+ if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
+ isNullConstant(N0.getOperand(0).getOperand(0)))
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
+ DAG.getNode(ISD::SHL, SDLoc(N), VT,
+ N0.getOperand(0).getOperand(1),
+ N0.getOperand(1)));
+
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue AndOp0 = N1.getOperand(0);
+ unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+ // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+ // and similar xforms where the inner op is either ~0 or 0.
+ if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ }
+ }
+
+ // add (sext i1), X -> sub X, (zext i1)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueType() == MVT::i1 &&
+ !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ SDLoc DL(N);
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
+ // add X, (sextinreg Y i1) -> sub X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDLoc DL(N);
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ SDLoc(N), MVT::Glue));
+
+ // canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
+
+ // fold (addc x, 0) -> x + no carry out
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+ SDLoc(N), MVT::Glue));
+
+ // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+ APInt LHSZero, LHSOne;
+ APInt RHSZero, RHSOne;
+ DAG.computeKnownBits(N0, LHSZero, LHSOne);
+
+ if (LHSZero.getBoolValue()) {
+ DAG.computeKnownBits(N1, RHSZero, RHSOne);
+
+ // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+ // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
+ return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE,
+ SDLoc(N), MVT::Glue));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
+ N1, N0, CarryIn);
+
+ // fold (adde x, y, false) -> (addc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations,
+ bool LegalTypes) {
+ if (!VT.isVector())
+ return DAG.getConstant(0, DL, VT);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return DAG.getConstant(0, DL, VT);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (sub x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
+ if (N0 == N1)
+ return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // fold (sub c1, c2) -> c1-c2
+ return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT,
+ N0.getNode(), N1.getNode());
+ }
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+ // fold (sub x, c) -> (add x, -c)
+ if (N1C) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::ADD, DL, VT, N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+ if (isAllOnesConstant(N0))
+ return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
+ // fold (A+B)-A -> B
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+ return N0.getOperand(1);
+ // fold (A+B)-B -> A
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+ return N0.getOperand(0);
+ // fold C2-(A+C1) -> (C2-C1)-A
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
+ dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
+ if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+ SDLoc DL(N);
+ SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
+ DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, NewC,
+ N1.getOperand(0));
+ }
+ // fold ((A+(B+or-C))-B) -> A+or-C
+ if (N0.getOpcode() == ISD::ADD &&
+ (N0.getOperand(1).getOpcode() == ISD::SUB ||
+ N0.getOperand(1).getOpcode() == ISD::ADD) &&
+ N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(1));
+ // fold ((A+(C+B))-B) -> A+C
+ if (N0.getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOpcode() == ISD::ADD &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+ // fold ((A-(B-C))-C) -> A-B
+ if (N0.getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOpcode() == ISD::SUB &&
+ N0.getOperand(1).getOperand(1) == N1)
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+ // If either operand of a sub is undef, the result is undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+
+ // If the relocation model supports it, consider symbol offsets.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ // fold (sub Sym, c) -> Sym-c
+ if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+ return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
+ GA->getOffset() -
+ (uint64_t)N1C->getSExtValue());
+ // fold (sub Sym+c1, Sym+c2) -> c1-c2
+ if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+ if (GA->getGlobal() == GB->getGlobal())
+ return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+ SDLoc(N), VT);
+ }
+
+ // sub X, (sextinreg Y i1) -> add X, (and Y 1)
+ if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
+ if (TN->getVT() == MVT::i1) {
+ SDLoc DL(N);
+ SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // fold (subc x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // fold (subc x, 0) -> x + no borrow
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (isAllOnesConstant(N0))
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (sube x, y, false) -> (subc x, y)
+ if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold (mul x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ bool N0IsConst = false;
+ bool N1IsConst = false;
+ bool N1IsOpaqueConst = false;
+ bool N0IsOpaqueConst = false;
+ APInt ConstValue0, ConstValue1;
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
+ N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
+ } else {
+ N0IsConst = isa<ConstantSDNode>(N0);
+ if (N0IsConst) {
+ ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+ N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
+ }
+ N1IsConst = isa<ConstantSDNode>(N1);
+ if (N1IsConst) {
+ ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+ N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
+ }
+ }
+
+ // fold (mul c1, c2) -> c1*c2
+ if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
+ return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
+ N0.getNode(), N1.getNode());
+
+ // canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
+ // fold (mul x, 0) -> 0
+ if (N1IsConst && ConstValue1 == 0)
+ return N1;
+ // We require a splat of the entire scalar bit width for non-contiguous
+ // bit patterns.
+ bool IsFullSplat =
+ ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
+ // fold (mul x, 1) -> x
+ if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
+ return N0;
+ // fold (mul x, -1) -> 0-x
+ if (N1IsConst && ConstValue1.isAllOnesValue()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT), N0);
+ }
+ // fold (mul x, (1 << c)) -> x << c
+ if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
+ IsFullSplat) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(ConstValue1.logBase2(), DL,
+ getShiftAmountTy(N0.getValueType())));
+ }
+ // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+ if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
+ IsFullSplat) {
+ unsigned Log2Val = (-ConstValue1).logBase2();
+ SDLoc DL(N);
+ // FIXME: If the input is something that is easily negated (e.g. a
+ // single-use add), we should put the negate there.
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT),
+ DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(Log2Val, DL,
+ getShiftAmountTy(N0.getValueType()))));
+ }
+
+ APInt Val;
+ // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+ if (N1IsConst && N0.getOpcode() == ISD::SHL &&
+ (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
+ isa<ConstantSDNode>(N0.getOperand(1)))) {
+ SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
+ AddToWorklist(C3.getNode());
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
+ }
+
+ // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+ // use.
+ {
+ SDValue Sh(nullptr, 0), Y(nullptr, 0);
+ // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
+ if (N0.getOpcode() == ISD::SHL &&
+ (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
+ isa<ConstantSDNode>(N0.getOperand(1))) &&
+ N0.getNode()->hasOneUse()) {
+ Sh = N0; Y = N1;
+ } else if (N1.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getNode()->hasOneUse()) {
+ Sh = N1; Y = N0;
+ }
+
+ if (Sh.getNode()) {
+ SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
+ }
+ }
+
+ // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
+ N0.getOpcode() == ISD::ADD &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+ isMulAddWithConstProfitable(N, N0, N1))
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT,
+ DAG.getNode(ISD::MUL, SDLoc(N0), VT,
+ N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT,
+ N0.getOperand(1), N1));
+
+ // reassociate mul
+ if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
+ return RMUL;
+
+ return SDValue();
+}
+
+/// Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+ const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ EVT NodeType = Node->getValueType(0);
+ if (!NodeType.isSimple())
+ return false;
+ switch (NodeType.getSimpleVT().SimpleTy) {
+ default: return false; // No libcall for vector types.
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Issue divrem if both quotient and remainder are needed.
+SDValue DAGCombiner::useDivRem(SDNode *Node) {
+ if (Node->use_empty())
+ return SDValue(); // This is a dead node, leave it alone.
+
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+
+ // DivMod lib calls can still work on non-legal types if using lib-calls.
+ EVT VT = Node->getValueType(0);
+ if (VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
+ return SDValue();
+
+ // If DIVREM is going to get expanded into a libcall,
+ // but there is no libcall available, then don't combine.
+ if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
+ !isDivRemLibcallAvailable(Node, isSigned, TLI))
+ return SDValue();
+
+ // If div is legal, it's better to do the normal expansion
+ unsigned OtherOpcode = 0;
+ if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
+ OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
+ if (TLI.isOperationLegalOrCustom(Opcode, VT))
+ return SDValue();
+ } else {
+ OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
+ return SDValue();
+ }
+
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue combined;
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node || User->use_empty())
+ continue;
+ // Convert the other matching node(s), too;
+ // otherwise, the DIVREM may get target-legalized into something
+ // target-specific that we won't be able to recognize.
+ unsigned UserOpc = User->getOpcode();
+ if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
+ User->getOperand(0) == Op0 &&
+ User->getOperand(1) == Op1) {
+ if (!combined) {
+ if (UserOpc == OtherOpcode) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
+ } else if (UserOpc == DivRemOpc) {
+ combined = SDValue(User, 0);
+ } else {
+ assert(UserOpc == Opcode);
+ continue;
+ }
+ }
+ if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
+ CombineTo(User, combined);
+ else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
+ CombineTo(User, combined.getValue(1));
+ }
+ }
+ return combined;
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ SDLoc DL(N);
+
+ // fold (sdiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
+ // fold (sdiv X, 1) -> X
+ if (N1C && N1C->isOne())
+ return N0;
+ // fold (sdiv X, -1) -> 0-X
+ if (N1C && N1C->isAllOnesValue())
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, DL, VT), N0);
+
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
+ }
+
+ // fold (sdiv X, pow2) -> simple ops after legalize
+ // FIXME: We check for the exact bit here because the generic lowering gives
+ // better results in that case. The target-specific lowering should learn how
+ // to handle exact sdivs efficiently.
+ if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+ !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
+ (N1C->getAPIntValue().isPowerOf2() ||
+ (-N1C->getAPIntValue()).isPowerOf2())) {
+ // Target-specific implementation of sdiv x, pow2.
+ if (SDValue Res = BuildSDIVPow2(N))
+ return Res;
+
+ unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+
+ // Splat the sign bit into the register
+ SDValue SGN =
+ DAG.getNode(ISD::SRA, DL, VT, N0,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
+ getShiftAmountTy(N0.getValueType())));
+ AddToWorklist(SGN.getNode());
+
+ // Add (N0 < 0) ? abs2 - 1 : 0;
+ SDValue SRL =
+ DAG.getNode(ISD::SRL, DL, VT, SGN,
+ DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
+ getShiftAmountTy(SGN.getValueType())));
+ SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
+ AddToWorklist(SRL.getNode());
+ AddToWorklist(ADD.getNode()); // Divide by pow2
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
+ DAG.getConstant(lg2, DL,
+ getShiftAmountTy(ADD.getValueType())));
+
+ // If we're dividing by a positive value, we're done. Otherwise, we must
+ // negate the result.
+ if (N1C->getAPIntValue().isNonNegative())
+ return SRA;
+
+ AddToWorklist(SRA.getNode());
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
+ }
+
+ // If integer divide is expensive and we satisfy the requirements, emit an
+ // alternate sequence. Targets may check function attributes for size/speed
+ // trade-offs.
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildSDIV(N))
+ return Op;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ // undef / X -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+ // X / undef -> undef
+ if (N1.isUndef())
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ SDLoc DL(N);
+
+ // fold (udiv c1, c2) -> c1/c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C)
+ if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
+ N0C, N1C))
+ return Folded;
+ // fold (udiv x, (1 << c)) -> x >>u c
+ if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
+ return DAG.getNode(ISD::SRL, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
+ getShiftAmountTy(N0.getValueType())));
+
+ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+ if (N1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
+ if (SHC->getAPIntValue().isPowerOf2()) {
+ EVT ADDVT = N1.getOperand(1).getValueType();
+ SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
+ N1.getOperand(1),
+ DAG.getConstant(SHC->getAPIntValue()
+ .logBase2(),
+ DL, ADDVT));
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
+ }
+ }
+ }
+
+ // fold (udiv x, c) -> alternate
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue Op = BuildUDIV(N))
+ return Op;
+
+ // sdiv, srem -> sdivrem
+ // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+ // Otherwise, we break the simplification logic in visitREM().
+ if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem;
+
+ // undef / X -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+ // X / undef -> undef
+ if (N1.isUndef())
+ return N1;
+
+ return SDValue();
+}
+
+// handles ISD::SREM and ISD::UREM
+SDValue DAGCombiner::visitREM(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ bool isSigned = (Opcode == ISD::SREM);
+ SDLoc DL(N);
+
+ // fold (rem c1, c2) -> c1%c2
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ if (N0C && N1C)
+ if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
+ return Folded;
+
+ if (isSigned) {
+ // If we know the sign bits of both operands are zero, strength reduce to a
+ // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+ if (!VT.isVector()) {
+ if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
+ }
+ } else {
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+ N1C->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::AND, DL, VT, N0,
+ DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+ }
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ if (N1.getOpcode() == ISD::SHL) {
+ ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
+ if (SHC && SHC->getAPIntValue().isPowerOf2()) {
+ APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
+ SDValue Add =
+ DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+ }
+ }
+ }
+
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+
+ // If X/C can be simplified by the division-by-constant logic, lower
+ // X%C to the equivalent of X-X/C*C.
+ // To avoid mangling nodes, this simplification requires that the combine()
+ // call for the speculative DIV must not cause a DIVREM conversion. We guard
+ // against this by skipping the simplification if isIntDivCheap(). When
+ // div is not cheap, combine will not return a DIVREM. Regardless,
+ // checking cheapness here makes sense since the simplification results in
+ // fatter code.
+ if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
+ unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+ SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
+ AddToWorklist(Div.getNode());
+ SDValue OptimizedDiv = combine(Div.getNode());
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+ assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
+ (OptimizedDiv.getOpcode() != ISD::SDIVREM));
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
+ AddToWorklist(Mul.getNode());
+ return Sub;
+ }
+ }
+
+ // sdiv, srem -> sdivrem
+ if (SDValue DivRem = useDivRem(N))
+ return DivRem.getValue(1);
+
+ // undef % X -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+ // X % undef -> undef
+ if (N1.isUndef())
+ return N1;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (mulhs x, 0) -> 0
+ if (isNullConstant(N1))
+ return N1;
+ // fold (mulhs x, 1) -> (sra x, size(x)-1)
+ if (isOneConstant(N1)) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
+ DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+ DL,
+ getShiftAmountTy(N0.getValueType())));
+ }
+ // fold (mulhs x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (mulhu x, 0) -> 0
+ if (isNullConstant(N1))
+ return N1;
+ // fold (mulhu x, 1) -> 0
+ if (isOneConstant(N1))
+ return DAG.getConstant(0, DL, N0.getValueType());
+ // fold (mulhu x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(N1.getValueType())));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
+ return SDValue();
+}
+
+/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
+/// give the opcodes for the two computations that are being performed. Return
+/// true if a simplification was made.
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+ unsigned HiOp) {
+ // If the high half is not needed, just compute the low half.
+ bool HiExists = N->hasAnyUseOfValue(1);
+ if (!HiExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
+ SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If the low half is not needed, just compute the high half.
+ bool LoExists = N->hasAnyUseOfValue(0);
+ if (!LoExists &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+ SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
+ return CombineTo(N, Res, Res);
+ }
+
+ // If both halves are used, return as it is.
+ if (LoExists && HiExists)
+ return SDValue();
+
+ // If the two computed results can be simplified separately, separate them.
+ if (LoExists) {
+ SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
+ AddToWorklist(Lo.getNode());
+ SDValue LoOpt = combine(Lo.getNode());
+ if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+ return CombineTo(N, LoOpt, LoOpt);
+ }
+
+ if (HiExists) {
+ SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
+ AddToWorklist(Hi.getNode());
+ SDValue HiOpt = combine(Hi.getNode());
+ if (HiOpt.getNode() && HiOpt != Hi &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+ return CombineTo(N, HiOpt, HiOpt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
+ return Res;
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // If the type is twice as wide is legal, transform the mulhu to a wider
+ // multiply plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+ if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
+ return Res;
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // If the type is twice as wide is legal, transform the mulhu to a wider
+ // multiply plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, DL,
+ getShiftAmountTy(Lo.getValueType())));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSMULO(SDNode *N) {
+ // (smulo x, 2) -> (saddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUMULO(SDNode *N) {
+ // (umulo x, 2) -> (uaddo x, x)
+ if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (C2->getAPIntValue() == 2)
+ return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
+ N->getOperand(0), N->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (add c1, c2) -> c1+c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
+
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+
+ return SDValue();
+}
+
+/// If this is a binary operator with two operands of the same opcode, try to
+/// simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+ // Bail early if none of these transforms apply.
+ if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
+ // For each of OP in AND/OR/XOR:
+ // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+ // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+ // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+ // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ //
+ // do not sink logical op inside of a vector extend, since it may combine
+ // into a vsetcc.
+ EVT Op0VT = N0.getOperand(0).getValueType();
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::BSWAP ||
+ // Avoid infinite looping with PromoteIntBinOp.
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
+ (N0.getOpcode() == ISD::TRUNCATE &&
+ (!TLI.isZExtFree(VT, Op0VT) ||
+ !TLI.isTruncateFree(Op0VT, VT)) &&
+ TLI.isTypeLegal(Op0VT))) &&
+ !VT.isVector() &&
+ Op0VT == N1.getOperand(0).getValueType() &&
+ (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorklist(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
+ }
+
+ // For each of OP in SHL/SRL/SRA/AND...
+ // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+ // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
+ // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+ N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0));
+ AddToWorklist(ORNode.getNode());
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
+ ORNode, N0.getOperand(1));
+ }
+
+ // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+ // Only perform this optimization up until type legalization, before
+ // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+ // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+ // we don't want to undo this promotion.
+ // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+ // on scalars.
+ if ((N0.getOpcode() == ISD::BITCAST ||
+ N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
+ Level <= AfterLegalizeTypes) {
+ SDValue In0 = N0.getOperand(0);
+ SDValue In1 = N1.getOperand(0);
+ EVT In0Ty = In0.getValueType();
+ EVT In1Ty = In1.getValueType();
+ SDLoc DL(N);
+ // If both incoming values are integers, and the original types are the
+ // same.
+ if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+ SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
+ SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
+ AddToWorklist(Op.getNode());
+ return BC;
+ }
+ }
+
+ // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+ // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+ // If both shuffles use the same mask, and both shuffle within a single
+ // vector, then it is worthwhile to move the swizzle after the operation.
+ // The type-legalizer generates this pattern when loading illegal
+ // vector types from memory. In many cases this allows additional shuffle
+ // optimizations.
+ // There are other cases where moving the shuffle after the xor/and/or
+ // is profitable even if shuffles don't perform a swizzle.
+ // If both shuffles use the same mask, and both shuffles have the same first
+ // or second operand, then it might still be profitable to move the shuffle
+ // after the xor/and/or operation.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+
+ assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+ "Inputs to shuffles are not the same type");
+
+ // Check that both shuffles use the same mask. The masks are known to be of
+ // the same length because the result vector type is the same.
+ // Check also that shuffles have only one use to avoid introducing extra
+ // instructions.
+ if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
+ SVN0->getMask().equals(SVN1->getMask())) {
+ SDValue ShOp = N0->getOperand(1);
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
+ if (!LegalTypes)
+ ShOp = DAG.getConstant(0, SDLoc(N), VT);
+ else
+ ShOp = SDValue();
+ }
+
+ // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
+ // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
+ // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
+ if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+ SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ N0->getOperand(0), N1->getOperand(0));
+ AddToWorklist(NewNode.getNode());
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
+ SVN0->getMask());
+ }
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ ShOp = N0->getOperand(0);
+ if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
+ if (!LegalTypes)
+ ShOp = DAG.getConstant(0, SDLoc(N), VT);
+ else
+ ShOp = SDValue();
+ }
+
+ // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
+ // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
+ // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
+ if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
+ SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ N0->getOperand(1), N1->getOperand(1));
+ AddToWorklist(NewNode.getNode());
+ return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
+ SVN0->getMask());
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an And operation to a single value. This makes them reusable in the context
+/// of visitSELECT(). Rules involving constants are not included as
+/// visitSELECT() already handles those cases.
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
+ EVT VT = N1.getValueType();
+
+ // fold (and x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(LocReference), VT);
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ if (isAllOnesConstant(LR)) {
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ }
+ }
+ // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
+ if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
+ Op0 == Op1 && LL.getValueType().isInteger() &&
+ Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+ (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+ SDLoc DL(N0);
+ SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+ LL, DAG.getConstant(1, DL,
+ LL.getValueType()));
+ AddToWorklist(ADDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+ DAG.getConstant(2, DL, LL.getValueType()),
+ ISD::SETUGE);
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (N0.getValueType() == CCVT ||
+ (!LegalOperations && N0.getValueType() == MVT::i1))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+ }
+
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDLoc DL(N0);
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, DL, VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ // Return N so it doesn't get rechecked!
+ return SDValue(LocReference, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Reduce bit extract of low half of an integer to the narrower type.
+ // (and (srl i64:x, K), KMask) ->
+ // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
+ if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ unsigned Size = VT.getSizeInBits();
+ const APInt &AndMask = CAnd->getAPIntValue();
+ unsigned ShiftBits = CShift->getZExtValue();
+ unsigned MaskBits = AndMask.countTrailingOnes();
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
+
+ if (APIntOps::isMask(AndMask) &&
+ // Required bits must not span the two halves of the integer and
+ // must fit in the half size type.
+ (ShiftBits + MaskBits <= Size / 2) &&
+ TLI.isNarrowingProfitable(VT, HalfVT) &&
+ TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
+ TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
+ TLI.isTruncateFree(VT, HalfVT) &&
+ TLI.isZExtFree(HalfVT, VT)) {
+ // The isNarrowingProfitable is to avoid regressions on PPC and
+ // AArch64 which match a few 64-bit bit insert / bit extract patterns
+ // on downstream users of this. Those patterns could probably be
+ // extended to handle extensions mixed in.
+
+ SDValue SL(N0);
+ assert(ShiftBits != 0 && MaskBits <= Size);
+
+ // Extracting the highest bit of the low half.
+ EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
+ N0.getOperand(0));
+
+ SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
+ SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
+ SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
+ SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
+ return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+ EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+ bool &NarrowLoad) {
+ uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
+
+ if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+ return false;
+
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ LoadedVT = LoadN->getMemoryVT();
+
+ if (ExtVT == LoadedVT &&
+ (!LegalOperations ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
+ // ZEXTLOAD will match without needing to change the size of the value being
+ // loaded.
+ NarrowLoad = false;
+ return true;
+ }
+
+ // Do not change the width of a volatile load.
+ if (LoadN->isVolatile())
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
+ return false;
+
+ NarrowLoad = true;
+ return true;
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (and x, 0) -> 0, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ // do not return N0, because undef node may exist in N0
+ return DAG.getConstant(
+ APInt::getNullValue(
+ N0.getValueType().getScalarType().getSizeInBits()),
+ SDLoc(N), N0.getValueType());
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ // do not return N1, because undef node may exist in N1
+ return DAG.getConstant(
+ APInt::getNullValue(
+ N1.getValueType().getScalarType().getSizeInBits()),
+ SDLoc(N), N1.getValueType());
+
+ // fold (and x, -1) -> x, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return N0;
+ }
+
+ // fold (and c1, c2) -> c1&c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+ // fold (and x, -1) -> x
+ if (isAllOnesConstant(N1))
+ return N0;
+ // if (and x, c) is known to be zero, return 0
+ unsigned BitWidth = VT.getScalarType().getSizeInBits();
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(BitWidth)))
+ return DAG.getConstant(0, SDLoc(N), VT);
+ // reassociate and
+ if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
+ return RAND;
+ // fold (and (or x, C), D) -> D if (C & D) == D
+ if (N1C && N0.getOpcode() == ISD::OR)
+ if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+ return N1;
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N0Op0 = N0.getOperand(0);
+ APInt Mask = ~N1C->getAPIntValue();
+ Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+ if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
+ N0.getValueType(), N0Op0);
+
+ // Replace uses of the AND with uses of the Zero extend node.
+ CombineTo(N, Zext);
+
+ // We actually want to replace all uses of the any_extend with the
+ // zero_extend, to avoid duplicating things. This will later cause this
+ // AND to be folded.
+ CombineTo(N0.getNode(), Zext);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
+ // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+ // already be zero by virtue of the width of the base type of the load.
+ //
+ // the 'X' node here can either be nothing or an extract_vector_elt to catch
+ // more cases.
+ if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD &&
+ N0.getOperand(0).getResNo() == 0) ||
+ (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
+ LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+ N0 : N0.getOperand(0) );
+
+ // Get the constant (if applicable) the zero'th operand is being ANDed with.
+ // This can be a pure constant or a vector splat, in which case we treat the
+ // vector as a scalar and use the splat value.
+ APInt Constant = APInt::getNullValue(1);
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ Constant = C->getAPIntValue();
+ } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+ SplatBitSize, HasAnyUndefs);
+ if (IsSplat) {
+ // Undef bits can contribute to a possible optimisation if set, so
+ // set them.
+ SplatValue |= SplatUndef;
+
+ // The splat value may be something like "0x00FFFFFF", which means 0 for
+ // the first vector value and FF for the rest, repeating. We need a mask
+ // that will apply equally to all members of the vector, so AND all the
+ // lanes of the constant together.
+ EVT VT = Vector->getValueType(0);
+ unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+
+ // If the splat value has been compressed to a bitlength lower
+ // than the size of the vector lane, we need to re-expand it to
+ // the lane size.
+ if (BitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
+ SplatBitSize < BitWidth;
+ SplatBitSize = SplatBitSize * 2)
+ SplatValue |= SplatValue.shl(SplatBitSize);
+
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if (SplatBitSize % BitWidth == 0) {
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
+ }
+ }
+
+ // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+ // actually legal and isn't going to get expanded, else this is a false
+ // optimisation.
+ bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+ Load->getValueType(0),
+ Load->getMemoryVT());
+
+ // Resize the constant to the same size as the original memory access before
+ // extension. If it is still the AllOnesValue then this AND is completely
+ // unneeded.
+ Constant =
+ Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+ bool B;
+ switch (Load->getExtensionType()) {
+ default: B = false; break;
+ case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+ case ISD::ZEXTLOAD:
+ case ISD::NON_EXTLOAD: B = true; break;
+ }
+
+ if (B && Constant.isAllOnesValue()) {
+ // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+ // preserve semantics once we get rid of the AND.
+ SDValue NewLoad(Load, 0);
+ if (Load->getExtensionType() == ISD::EXTLOAD) {
+ NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+ Load->getValueType(0), SDLoc(Load),
+ Load->getChain(), Load->getBasePtr(),
+ Load->getOffset(), Load->getMemoryVT(),
+ Load->getMemOperand());
+ // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+ if (Load->getNumValues() == 3) {
+ // PRE/POST_INC loads have 3 values.
+ SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
+ NewLoad.getValue(2) };
+ CombineTo(Load, To, 3, true);
+ } else {
+ CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+ }
+ }
+
+ // Fold the AND away, taking care not to fold to the old load node if we
+ // replaced it.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (and (load x), 255) -> (zextload x, i8)
+ // fold (and (extload x, i16), 255) -> (zextload x, i8)
+ // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+ if (N1C && (N0.getOpcode() == ISD::LOAD ||
+ (N0.getOpcode() == ISD::ANY_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+ bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+ LoadSDNode *LN0 = HasAnyExt
+ ? cast<LoadSDNode>(N0.getOperand(0))
+ : cast<LoadSDNode>(N0);
+ if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+ LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
+ auto NarrowLoad = false;
+ EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad)) {
+ if (!NarrowLoad) {
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
+ LN0->getChain(), LN0->getBasePtr(), ExtVT,
+ LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ } else {
+ EVT PtrType = LN0->getOperand(1).getValueType();
+
+ unsigned Alignment = LN0->getAlignment();
+ SDValue NewPtr = LN0->getBasePtr();
+
+ // For big endian targets, we need to add an offset to the pointer
+ // to load the correct bytes. For little endian systems, we merely
+ // need to read fewer bytes from the same pointer.
+ if (DAG.getDataLayout().isBigEndian()) {
+ unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+ unsigned EVTStoreBytes = ExtVT.getStoreSize();
+ unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+ SDLoc DL(LN0);
+ NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
+ NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
+ Alignment = MinAlign(Alignment, PtrOff);
+ }
+
+ AddToWorklist(NewPtr.getNode());
+
+ SDValue Load = DAG.getExtLoad(
+ ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo(), ExtVT, Alignment,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+ AddToWorklist(N);
+ CombineTo(LN0, Load, Load.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+ }
+
+ if (SDValue Combined = visitANDLike(N0, N1, N))
+ return Combined;
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
+
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
+ if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
+ if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false))
+ return BSwap;
+ }
+
+ return SDValue();
+}
+
+/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+ bool DemandHighBits) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+ bool LookPassAnd0 = false;
+ bool LookPassAnd1 = false;
+ if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+ std::swap(N0, N1);
+ if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() == ISD::AND) {
+ if (!N0.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01C || N01C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N0 = N0.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ if (N1.getOpcode() == ISD::AND) {
+ if (!N1.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C || N11C->getZExtValue() != 0xFF)
+ return SDValue();
+ N1 = N1.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+ std::swap(N0, N1);
+ if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+ return SDValue();
+ if (!N0.getNode()->hasOneUse() ||
+ !N1.getNode()->hasOneUse())
+ return SDValue();
+
+ ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N01C || !N11C)
+ return SDValue();
+ if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+ return SDValue();
+
+ // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+ SDValue N00 = N0->getOperand(0);
+ if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+ if (!N00.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+ if (!N001C || N001C->getZExtValue() != 0xFF)
+ return SDValue();
+ N00 = N00.getOperand(0);
+ LookPassAnd0 = true;
+ }
+
+ SDValue N10 = N1->getOperand(0);
+ if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+ if (!N10.getNode()->hasOneUse())
+ return SDValue();
+ ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+ if (!N101C || N101C->getZExtValue() != 0xFF00)
+ return SDValue();
+ N10 = N10.getOperand(0);
+ LookPassAnd1 = true;
+ }
+
+ if (N00 != N10)
+ return SDValue();
+
+ // Make sure everything beyond the low halfword gets set to zero since the SRL
+ // 16 will clear the top bits.
+ unsigned OpSizeInBits = VT.getSizeInBits();
+ if (DemandHighBits && OpSizeInBits > 16) {
+ // If the left-shift isn't masked out then the only way this is a bswap is
+ // if all bits beyond the low 8 are 0. In that case the entire pattern
+ // reduces to a left shift anyway: leave it for other parts of the combiner.
+ if (!LookPassAnd0)
+ return SDValue();
+
+ // However, if the right shift isn't masked out then it might be because
+ // it's not needed. See if we can spot that too.
+ if (!LookPassAnd1 &&
+ !DAG.MaskedValueIsZero(
+ N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
+ return SDValue();
+ }
+
+ SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
+ if (OpSizeInBits > 16) {
+ SDLoc DL(N);
+ Res = DAG.getNode(ISD::SRL, DL, VT, Res,
+ DAG.getConstant(OpSizeInBits - 16, DL,
+ getShiftAmountTy(VT)));
+ }
+ return Res;
+}
+
+/// Return true if the specified node is an element that makes up a 32-bit
+/// packed halfword byteswap.
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
+static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
+ if (!N.getNode()->hasOneUse())
+ return false;
+
+ unsigned Opc = N.getOpcode();
+ if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+ return false;
+
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!N1C)
+ return false;
+
+ unsigned Num;
+ switch (N1C->getZExtValue()) {
+ default:
+ return false;
+ case 0xFF: Num = 0; break;
+ case 0xFF00: Num = 1; break;
+ case 0xFF0000: Num = 2; break;
+ case 0xFF000000: Num = 3; break;
+ }
+
+ // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+ SDValue N0 = N.getOperand(0);
+ if (Opc == ISD::AND) {
+ if (Num == 0 || Num == 2) {
+ // (x >> 8) & 0xff
+ // (x >> 8) & 0xff0000
+ if (N0.getOpcode() != ISD::SRL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else {
+ // (x << 8) & 0xff00
+ // (x << 8) & 0xff000000
+ if (N0.getOpcode() != ISD::SHL)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+ } else if (Opc == ISD::SHL) {
+ // (x & 0xff) << 8
+ // (x & 0xff0000) << 8
+ if (Num != 0 && Num != 2)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ } else { // Opc == ISD::SRL
+ // (x & 0xff00) >> 8
+ // (x & 0xff000000) >> 8
+ if (Num != 1 && Num != 3)
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!C || C->getZExtValue() != 8)
+ return false;
+ }
+
+ if (Parts[Num])
+ return false;
+
+ Parts[Num] = N0.getOperand(0).getNode();
+ return true;
+}
+
+/// Match a 32-bit packed halfword bswap. That is
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+ if (!LegalOperations)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+ if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Look for either
+ // (or (or (and), (and)), (or (and), (and)))
+ // (or (or (or (and), (and)), (and)), (and))
+ if (N0.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ SDNode *Parts[4] = {};
+
+ if (N1.getOpcode() == ISD::OR &&
+ N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
+ // (or (or (and), (and)), (or (and), (and)))
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ SDValue N010 = N01.getOperand(0);
+ if (!isBSwapHWordElement(N010, Parts))
+ return SDValue();
+ SDValue N011 = N01.getOperand(1);
+ if (!isBSwapHWordElement(N011, Parts))
+ return SDValue();
+ } else {
+ // (or (or (or (and), (and)), (and)), (and))
+ if (!isBSwapHWordElement(N1, Parts))
+ return SDValue();
+ if (!isBSwapHWordElement(N01, Parts))
+ return SDValue();
+ if (N00.getOpcode() != ISD::OR)
+ return SDValue();
+ SDValue N000 = N00.getOperand(0);
+ if (!isBSwapHWordElement(N000, Parts))
+ return SDValue();
+ SDValue N001 = N00.getOperand(1);
+ if (!isBSwapHWordElement(N001, Parts))
+ return SDValue();
+ }
+
+ // Make sure the parts are all coming from the same node.
+ if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
+ SDValue(Parts[0], 0));
+
+ // Result of the bswap should be rotated by 16. If it's not legal, then
+ // do (x << 16) | (x >> 16).
+ SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+ return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
+ if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+ return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+ return DAG.getNode(ISD::OR, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
+ DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
+}
+
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an Or operation to a single value \see visitANDLike().
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+ EVT VT = N1.getValueType();
+ // fold (or x, undef) -> -1
+ if (!LegalOperations &&
+ (N0.isUndef() || N1.isUndef())) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
+ SDLoc(LocReference), VT);
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+ EVT CCVT = getSetCCResultType(LL.getValueType());
+ if (N0.getValueType() == CCVT ||
+ (!LegalOperations && N0.getValueType() == MVT::i1))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ if (const ConstantSDNode *N0O1C =
+ getAsNonOpaqueConstant(N0.getOperand(1))) {
+ if (const ConstantSDNode *N1O1C =
+ getAsNonOpaqueConstant(N1.getOperand(1))) {
+ // We can only do this xform if we know that bits from X that are set in
+ // C2 but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask = N0O1C->getAPIntValue();
+ const APInt &RHSMask = N1O1C->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ SDLoc DL(LocReference);
+ return DAG.getNode(ISD::AND, DL, VT, X,
+ DAG.getConstant(LHSMask | RHSMask, DL, VT));
+ }
+ }
+ }
+ }
+
+ // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(1), N1.getOperand(1));
+ return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (or x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+
+ // fold (or x, -1) -> -1, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ // do not return N0, because undef node may exist in N0
+ return DAG.getConstant(
+ APInt::getAllOnesValue(
+ N0.getValueType().getScalarType().getSizeInBits()),
+ SDLoc(N), N0.getValueType());
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ // do not return N1, because undef node may exist in N1
+ return DAG.getConstant(
+ APInt::getAllOnesValue(
+ N1.getValueType().getScalarType().getSizeInBits()),
+ SDLoc(N), N1.getValueType());
+
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
+ // Do this only if the resulting shuffle is legal.
+ if (isa<ShuffleVectorSDNode>(N0) &&
+ isa<ShuffleVectorSDNode>(N1) &&
+ // Avoid folding a node with illegal type.
+ TLI.isTypeLegal(VT)) {
+ bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
+ bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
+ bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+ bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
+ // Ensure both shuffles have a zero input.
+ if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
+ assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
+ assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
+ const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
+ const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
+ bool CanFold = true;
+ int NumElts = VT.getVectorNumElements();
+ SmallVector<int, 4> Mask(NumElts);
+
+ for (int i = 0; i != NumElts; ++i) {
+ int M0 = SV0->getMaskElt(i);
+ int M1 = SV1->getMaskElt(i);
+
+ // Determine if either index is pointing to a zero vector.
+ bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
+ bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
+
+ // If one element is zero and the otherside is undef, keep undef.
+ // This also handles the case that both are undef.
+ if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
+ Mask[i] = -1;
+ continue;
+ }
+
+ // Make sure only one of the elements is zero.
+ if (M0Zero == M1Zero) {
+ CanFold = false;
+ break;
+ }
+
+ assert((M0 >= 0 || M1 >= 0) && "Undef index!");
+
+ // We have a zero and non-zero element. If the non-zero came from
+ // SV0 make the index a LHS index. If it came from SV1, make it
+ // a RHS index. We need to mod by NumElts because we don't care
+ // which operand it came from in the original shuffles.
+ Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
+ }
+
+ if (CanFold) {
+ SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
+ SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
+
+ bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
+ if (!LegalMask) {
+ std::swap(NewLHS, NewRHS);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
+ }
+
+ if (LegalMask)
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
+ }
+ }
+ }
+ }
+
+ // fold (or c1, c2) -> c1|c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
+ // fold (or x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+ // fold (or x, -1) -> -1
+ if (isAllOnesConstant(N1))
+ return N1;
+ // fold (or x, c) -> c iff (x & ~c) == 0
+ if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+ return N1;
+
+ if (SDValue Combined = visitORLike(N0, N1, N))
+ return Combined;
+
+ // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+ if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
+ return BSwap;
+ if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
+ return BSwap;
+
+ // reassociate or
+ if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
+ return ROR;
+ // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ // iff (c1 & c2) == 0.
+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
+ if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
+ N1C, C1))
+ return DAG.getNode(
+ ISD::AND, SDLoc(N), VT,
+ DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
+ return SDValue();
+ }
+ }
+ // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
+
+ // See if this is some rotate idiom.
+ if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
+ return SDValue(Rot, 0);
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// Match "(X shl/srl V1) & V2" where V2 may not be present.
+bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+ if (Op.getOpcode() == ISD::AND) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
+ Mask = Op.getOperand(1);
+ Op = Op.getOperand(0);
+ } else {
+ return false;
+ }
+ }
+
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+ Shift = Op;
+ return true;
+ }
+
+ return false;
+}
+
+// Return true if we can prove that, whenever Neg and Pos are both in the
+// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
+// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
+//
+// (or (shift1 X, Neg), (shift2 X, Pos))
+//
+// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
+// in direction shift1 by Neg. The range [0, EltSize) means that we only need
+// to consider shift amounts with defined behavior.
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+ // If EltSize is a power of 2 then:
+ //
+ // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
+ // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
+ //
+ // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
+ // for the stronger condition:
+ //
+ // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
+ //
+ // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
+ // we can just replace Neg with Neg' for the rest of the function.
+ //
+ // In other cases we check for the even stronger condition:
+ //
+ // Neg == EltSize - Pos [B]
+ //
+ // for all Neg and Pos. Note that the (or ...) then invokes undefined
+ // behavior if Pos == 0 (and consequently Neg == EltSize).
+ //
+ // We could actually use [A] whenever EltSize is a power of 2, but the
+ // only extra cases that it would match are those uninteresting ones
+ // where Neg and Pos are never in range at the same time. E.g. for
+ // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+ // as well as (sub 32, Pos), but:
+ //
+ // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
+ //
+ // always invokes undefined behavior for 32-bit X.
+ //
+ // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+ unsigned MaskLoBits = 0;
+ if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+ if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
+ if (NegC->getAPIntValue() == EltSize - 1) {
+ Neg = Neg.getOperand(0);
+ MaskLoBits = Log2_64(EltSize);
+ }
+ }
+ }
+
+ // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
+ if (Neg.getOpcode() != ISD::SUB)
+ return false;
+ ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
+ if (!NegC)
+ return false;
+ SDValue NegOp1 = Neg.getOperand(1);
+
+ // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
+ // Pos'. The truncation is redundant for the purpose of the equality.
+ if (MaskLoBits && Pos.getOpcode() == ISD::AND)
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ if (PosC->getAPIntValue() == EltSize - 1)
+ Pos = Pos.getOperand(0);
+
+ // The condition we need is now:
+ //
+ // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
+ //
+ // If NegOp1 == Pos then we need:
+ //
+ // EltSize & Mask == NegC & Mask
+ //
+ // (because "x & Mask" is a truncation and distributes through subtraction).
+ APInt Width;
+ if (Pos == NegOp1)
+ Width = NegC->getAPIntValue();
+
+ // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
+ // Then the condition we want to prove becomes:
+ //
+ // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
+ //
+ // which, again because "x & Mask" is a truncation, becomes:
+ //
+ // NegC & Mask == (EltSize - PosC) & Mask
+ // EltSize & Mask == (NegC + PosC) & Mask
+ else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
+ if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+ Width = PosC->getAPIntValue() + NegC->getAPIntValue();
+ else
+ return false;
+ } else
+ return false;
+
+ // Now we just need to check that EltSize & Mask == Width & Mask.
+ if (MaskLoBits)
+ // EltSize & Mask is 0 since Mask is EltSize - 1.
+ return Width.getLoBits(MaskLoBits) == 0;
+ return Width == EltSize;
+}
+
+// A subroutine of MatchRotate used once we have found an OR of two opposite
+// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
+// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
+// former being preferred if supported. InnerPos and InnerNeg are Pos and
+// Neg with outer conversions stripped away.
+SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
+ SDValue Neg, SDValue InnerPos,
+ SDValue InnerNeg, unsigned PosOpcode,
+ unsigned NegOpcode, const SDLoc &DL) {
+ // fold (or (shl x, (*ext y)),
+ // (srl x, (*ext (sub 32, y)))) ->
+ // (rotl x, y) or (rotr x, (sub 32, y))
+ //
+ // fold (or (shl x, (*ext (sub 32, y))),
+ // (srl x, (*ext y))) ->
+ // (rotr x, y) or (rotl x, (sub 32, y))
+ EVT VT = Shifted.getValueType();
+ if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
+ bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
+ return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
+ HasPos ? Pos : Neg).getNode();
+ }
+
+ return nullptr;
+}
+
+// MatchRotate - Handle an 'or' of two operands. If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
+ // Must be a legal type. Expanded 'n promoted things won't work with rotates.
+ EVT VT = LHS.getValueType();
+ if (!TLI.isTypeLegal(VT)) return nullptr;
+
+ // The target must have at least one rotate flavor.
+ bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+ bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+ if (!HasROTL && !HasROTR) return nullptr;
+
+ // Match "(X shl/srl V1) & V2" where V2 may not be present.
+ SDValue LHSShift; // The shift.
+ SDValue LHSMask; // AND value if any.
+ if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+ return nullptr; // Not part of a rotate.
+
+ SDValue RHSShift; // The shift.
+ SDValue RHSMask; // AND value if any.
+ if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+ return nullptr; // Not part of a rotate.
+
+ if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+ return nullptr; // Not shifting the same value.
+
+ if (LHSShift.getOpcode() == RHSShift.getOpcode())
+ return nullptr; // Shifts must disagree.
+
+ // Canonicalize shl to left side in a shl/srl pair.
+ if (RHSShift.getOpcode() == ISD::SHL) {
+ std::swap(LHS, RHS);
+ std::swap(LHSShift, RHSShift);
+ std::swap(LHSMask, RHSMask);
+ }
+
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ SDValue LHSShiftArg = LHSShift.getOperand(0);
+ SDValue LHSShiftAmt = LHSShift.getOperand(1);
+ SDValue RHSShiftArg = RHSShift.getOperand(0);
+ SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
+ uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
+ uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
+ if ((LShVal + RShVal) != EltSizeInBits)
+ return nullptr;
+
+ SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+ LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
+
+ // If there is an AND of either shifted operand, apply it to the result.
+ if (LHSMask.getNode() || RHSMask.getNode()) {
+ APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
+ SDValue Mask = DAG.getConstant(AllBits, DL, VT);
+
+ if (LHSMask.getNode()) {
+ APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, LHSMask,
+ DAG.getConstant(RHSBits, DL, VT)));
+ }
+ if (RHSMask.getNode()) {
+ APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
+ Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+ DAG.getNode(ISD::OR, DL, VT, RHSMask,
+ DAG.getConstant(LHSBits, DL, VT)));
+ }
+
+ Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
+ }
+
+ return Rot.getNode();
+ }
+
+ // If there is a mask here, and we have a variable shift, we can't be sure
+ // that we're masking out the right stuff.
+ if (LHSMask.getNode() || RHSMask.getNode())
+ return nullptr;
+
+ // If the shift amount is sign/zext/any-extended just peel it off.
+ SDValue LExtOp0 = LHSShiftAmt;
+ SDValue RExtOp0 = RHSShiftAmt;
+ if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+ (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
+ RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+ LExtOp0 = LHSShiftAmt.getOperand(0);
+ RExtOp0 = RHSShiftAmt.getOperand(0);
+ }
+
+ SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+ LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+ if (TryL)
+ return TryL;
+
+ SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+ RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+ if (TryR)
+ return TryR;
+
+ return nullptr;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // fold vector ops
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (xor x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+ }
+
+ // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+ if (N0.isUndef() && N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+ // fold (xor x, undef) -> undef
+ if (N0.isUndef())
+ return N0;
+ if (N1.isUndef())
+ return N1;
+ // fold (xor c1, c2) -> c1^c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+ if (N0C && N1C)
+ return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
+ // canonicalize constant to RHS
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
+ // fold (xor x, 0) -> x
+ if (isNullConstant(N1))
+ return N0;
+ // reassociate xor
+ if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
+ return RXOR;
+
+ // fold !(x cc y) -> (x !cc y)
+ SDValue LHS, RHS, CC;
+ if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+ bool isInt = LHS.getValueType().isInteger();
+ ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ isInt);
+
+ if (!LegalOperations ||
+ TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
+ switch (N0.getOpcode()) {
+ default:
+ llvm_unreachable("Unhandled SetCC Equivalent!");
+ case ISD::SETCC:
+ return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
+ case ISD::SELECT_CC:
+ return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
+ N0.getOperand(3), NotCC);
+ }
+ }
+ }
+
+ // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+ if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getNode()->hasOneUse() &&
+ isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+ SDValue V = N0.getOperand(0);
+ SDLoc DL(N0);
+ V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
+ DAG.getConstant(1, DL, V.getValueType()));
+ AddToWorklist(V.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
+ }
+
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+ if (isOneConstant(N1) && VT == MVT::i1 &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
+ AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
+ return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
+ }
+ }
+ // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+ if (isAllOnesConstant(N1) &&
+ (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+ LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
+ RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
+ AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
+ return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
+ }
+ }
+ // fold (xor (and x, y), y) -> (and (not x), y)
+ if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ N0->getOperand(1) == N1) {
+ SDValue X = N0->getOperand(0);
+ SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
+ AddToWorklist(NotX.getNode());
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
+ }
+ // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+ if (N1C && N0.getOpcode() == ISD::XOR) {
+ if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N00C->getAPIntValue(), DL, VT));
+ }
+ if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getConstant(N1C->getAPIntValue() ^
+ N01C->getAPIntValue(), DL, VT));
+ }
+ }
+ // fold (xor x, x) -> 0
+ if (N0 == N1)
+ return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+
+ // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
+ // Here is a concrete example of this equivalence:
+ // i16 x == 14
+ // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
+ // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
+ //
+ // =>
+ //
+ // i16 ~1 == 0b1111111111111110
+ // i16 rol(~1, 14) == 0b1011111111111111
+ //
+ // Some additional tips to help conceptualize this transform:
+ // - Try to see the operation as placing a single zero in a value of all ones.
+ // - There exists no value for x which would allow the result to contain zero.
+ // - Values of x larger than the bitwidth are undefined and do not require a
+ // consistent result.
+ // - Pushing the zero left requires shifting one bits in from the right.
+ // A rotate left of ~1 is a nice way of achieving the desired result.
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
+ && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
+ N0.getOperand(1));
+ }
+
+ // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
+ if (N0.getOpcode() == N1.getOpcode())
+ if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+ return Tmp;
+
+ // Simplify the expression using non-local knowledge.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+/// Handle transforms common to the three shifts, when the shift amount is a
+/// constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+ SDNode *LHS = N->getOperand(0).getNode();
+ if (!LHS->hasOneUse()) return SDValue();
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ bool HighBitSet = false; // Can we transform this if the high bit is set?
+
+ switch (LHS->getOpcode()) {
+ default: return SDValue();
+ case ISD::OR:
+ case ISD::XOR:
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ case ISD::AND:
+ HighBitSet = true; // We can only transform sra if the high bit is set.
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDValue(); // only shl(add) not sr[al](add).
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ }
+
+ // We require the RHS of the binop to be a constant and not opaque as well.
+ ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
+ if (!BinOpCst) return SDValue();
+
+ // FIXME: disable this unless the input to the binop is a shift by a constant.
+ // If it is not a shift, it pessimizes some common cases like:
+ //
+ // void foo(int *X, int i) { X[i & 1235] = 1; }
+ // int bar(int *X, int i) { return X[i & 255]; }
+ SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+ if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+ BinOpLHSVal->getOpcode() != ISD::SRA &&
+ BinOpLHSVal->getOpcode() != ISD::SRL) ||
+ !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // If this is a signed shift right, and the high bit is modified by the
+ // logical operation, do not perform the transformation. The highBitSet
+ // boolean indicates the value of the high bit of the constant which would
+ // cause it to be modified for this operation.
+ if (N->getOpcode() == ISD::SRA) {
+ bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+ if (BinOpRHSSignSet != HighBitSet)
+ return SDValue();
+ }
+
+ if (!TLI.isDesirableToCommuteWithShift(LHS))
+ return SDValue();
+
+ // Fold the constants, shifting the binop RHS by the shift amount.
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
+ N->getValueType(0),
+ LHS->getOperand(1), N->getOperand(1));
+ assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
+
+ // Create the new shift.
+ SDValue NewShift = DAG.getNode(N->getOpcode(),
+ SDLoc(LHS->getOperand(0)),
+ VT, LHS->getOperand(0), N->getOperand(1));
+
+ // Create the new binop.
+ return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
+ assert(N->getOpcode() == ISD::TRUNCATE);
+ assert(N->getOperand(0).getOpcode() == ISD::AND);
+
+ // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
+ if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+ SDValue N01 = N->getOperand(0).getOperand(1);
+
+ if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
+ if (!N01C->isOpaque()) {
+ EVT TruncVT = N->getValueType(0);
+ SDValue N00 = N->getOperand(0).getOperand(0);
+ APInt TruncC = N01C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+ SDLoc DL(N);
+
+ return DAG.getNode(ISD::AND, DL, TruncVT,
+ DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
+ DAG.getConstant(TruncC, DL, TruncVT));
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitRotate(SDNode *N) {
+ // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
+ if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 =
+ distributeTruncateThroughAnd(N->getOperand(1).getNode()))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), NewOp1);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+ // fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
+ // If setcc produces all-one true value then:
+ // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
+ if (N1CV && N1CV->isConstant()) {
+ if (N0.getOpcode() == ISD::AND) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
+
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+ TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
+ N01CV, N1CV))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+ }
+ } else {
+ N1C = isConstOrConstSplat(N1);
+ }
+ }
+ }
+
+ // fold (shl c1, c2) -> c1<<c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
+ // fold (shl 0, x) -> 0
+ if (isNullConstant(N0))
+ return N0;
+ // fold (shl x, c >= size(x)) -> undef
+ if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+ return DAG.getUNDEF(VT);
+ // fold (shl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (shl undef, x) -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+ // if (shl x, c) is known to be zero, return 0
+ if (DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, SDLoc(N), VT);
+ // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N0C1->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ SDLoc DL(N);
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+ }
+ }
+
+ // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0Op0.getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ SDLoc DL(N0);
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT,
+ DAG.getNode(N0.getOpcode(), DL, VT,
+ N0Op0->getOperand(0)),
+ DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+ }
+ }
+ }
+
+ // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
+ // Only fold this if the inner zext has no other uses to avoid increasing
+ // the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
+ if (c1 < VT.getScalarSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 == c2) {
+ SDValue NewOp0 = N0.getOperand(0);
+ EVT CountVT = NewOp0.getOperand(1).getValueType();
+ SDLoc DL(N);
+ SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
+ NewOp0,
+ DAG.getConstant(c2, DL, CountVT));
+ AddToWorklist(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ }
+ }
+ }
+ }
+
+ // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
+ // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
+ if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
+ cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t C1 = N0C1->getZExtValue();
+ uint64_t C2 = N1C->getZExtValue();
+ SDLoc DL(N);
+ if (C1 <= C2)
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(C2 - C1, DL, N1.getValueType()));
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
+ DAG.getConstant(C1 - C2, DL, N1.getValueType()));
+ }
+ }
+
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
+ // Only fold this if the inner shift has no other uses -- if it does, folding
+ // this will increase the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N0C1->getZExtValue();
+ if (c1 < OpSizeInBits) {
+ uint64_t c2 = N1C->getZExtValue();
+ APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask = Mask.shl(c2 - c1);
+ SDLoc DL(N);
+ Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c2 - c1, DL, N1.getValueType()));
+ } else {
+ Mask = Mask.lshr(c1 - c2);
+ SDLoc DL(N);
+ Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 - c2, DL, N1.getValueType()));
+ }
+ SDLoc DL(N0);
+ return DAG.getNode(ISD::AND, DL, VT, Shift,
+ DAG.getConstant(Mask, DL, VT));
+ }
+ }
+ }
+ // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+ if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ unsigned BitSize = VT.getScalarSizeInBits();
+ SDLoc DL(N);
+ SDValue HiBitsMask =
+ DAG.getConstant(APInt::getHighBitsSet(BitSize,
+ BitSize - N1C->getZExtValue()),
+ DL, VT);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
+ HiBitsMask);
+ }
+
+ // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // Variant of version done on multiply, except mul by a power of 2 is turned
+ // into a shift.
+ APInt Val;
+ if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ (isa<ConstantSDNode>(N0.getOperand(1)) ||
+ ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+ SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
+ SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
+ }
+
+ // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
+ if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ if (SDValue Folded =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
+ return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
+ }
+ }
+
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSHL = visitShiftByConstant(N, N1C))
+ return NewSHL;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ N1C = isConstOrConstSplat(N1);
+ }
+
+ // fold (sra c1, c2) -> (sra c1, c2)
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
+ // fold (sra 0, x) -> 0
+ if (isNullConstant(N0))
+ return N0;
+ // fold (sra -1, x) -> -1
+ if (isAllOnesConstant(N0))
+ return N0;
+ // fold (sra x, (setge c, size(x))) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (sra x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+ // sext_inreg.
+ if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+ if (VT.isVector())
+ ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ ExtVT, VT.getVectorNumElements());
+ if ((!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
+ N0.getOperand(0), DAG.getValueType(ExtVT));
+ }
+
+ // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRA) {
+ if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+ if (Sum >= OpSizeInBits)
+ Sum = OpSizeInBits - 1;
+ SDLoc DL(N);
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Sum, DL, N1.getValueType()));
+ }
+ }
+
+ // fold (sra (shl X, m), (sub result_size, n))
+ // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+ // result_size - n != m.
+ // If truncate is free for the target sext(shl) is likely to result in better
+ // code.
+ if (N0.getOpcode() == ISD::SHL && N1C) {
+ // Get the two constanst of the shifts, CN0 = m, CN = n.
+ const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
+ if (N01C) {
+ LLVMContext &Ctx = *DAG.getContext();
+ // Determine what the truncate's result bitsize and type would be.
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
+
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
+ // Determine the residual right-shift amount.
+ int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+ // If the shift is not a no-op (in which case this should be just a sign
+ // extend already), the truncated to type is legal, sign_extend is legal
+ // on that type, and the truncate to that type is both legal and free,
+ // perform the transform.
+ if ((ShiftAmt > 0) &&
+ TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+ TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+ TLI.isTruncateFree(VT, TruncVT)) {
+
+ SDLoc DL(N);
+ SDValue Amt = DAG.getConstant(ShiftAmt, DL,
+ getShiftAmountTy(N0.getOperand(0).getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
+ N0.getOperand(0), Amt);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
+ Shift);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL,
+ N->getValueType(0), Trunc);
+ }
+ }
+ }
+
+ // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
+ // if c1 is equal to the number of bits the trunc removes
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() &&
+ N1C) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ unsigned LargeShiftVal = LargeShift->getZExtValue();
+ EVT LargeVT = N0Op0.getValueType();
+
+ if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+ SDLoc DL(N);
+ SDValue Amt =
+ DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
+ getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
+ SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
+ N0Op0.getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
+ }
+ }
+ }
+
+ // Simplify, based on bits shifted out of the LHS.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+
+ // If the sign bit is known to be zero, switch this to a SRL.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
+
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRA = visitShiftByConstant(N, N1C))
+ return NewSRA;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+ // fold vector ops
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ N1C = isConstOrConstSplat(N1);
+ }
+
+ // fold (srl c1, c2) -> c1 >>u c2
+ ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+ if (N0C && N1C && !N1C->isOpaque())
+ return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
+ // fold (srl 0, x) -> 0
+ if (isNullConstant(N0))
+ return N0;
+ // fold (srl x, c >= size(x)) -> undef
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+ return DAG.getUNDEF(VT);
+ // fold (srl x, 0) -> x
+ if (N1C && N1C->isNullValue())
+ return N0;
+ // if (srl x, c) is known to be zero, return 0
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+ APInt::getAllOnesValue(OpSizeInBits)))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+ if (N1C && N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N01C->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ SDLoc DL(N);
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+ }
+ }
+
+ // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ SDLoc DL(N0);
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ N0.getOperand(0)->getOperand(0),
+ DAG.getConstant(c1 + c2, DL,
+ ShiftCountVT)));
+ }
+ }
+
+ // fold (srl (shl x, c), c) -> (and x, cst2)
+ if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
+ unsigned BitSize = N0.getScalarValueSizeInBits();
+ if (BitSize <= 64) {
+ uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL >> ShAmt, DL, VT));
+ }
+ }
+
+ // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
+ if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+ // Shifting in all undef bits?
+ EVT SmallVT = N0.getOperand(0).getValueType();
+ unsigned BitSize = SmallVT.getScalarSizeInBits();
+ if (N1C->getZExtValue() >= BitSize)
+ return DAG.getUNDEF(VT);
+
+ if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+ uint64_t ShiftAmt = N1C->getZExtValue();
+ SDLoc DL0(N0);
+ SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
+ N0.getOperand(0),
+ DAG.getConstant(ShiftAmt, DL0,
+ getShiftAmountTy(SmallVT)));
+ AddToWorklist(SmallShift.getNode());
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
+ DAG.getConstant(Mask, DL, VT));
+ }
+ }
+
+ // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
+ // bit, which is unmodified by sra.
+ if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
+ if (N0.getOpcode() == ISD::SRA)
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
+ }
+
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
+ APInt KnownZero, KnownOne;
+ DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
+
+ // If any of the input bits are KnownOne, then the input couldn't be all
+ // zeros, thus the result of the srl will always be zero.
+ if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
+
+ // If all of the bits input the to ctlz node are known to be zero, then
+ // the result of the ctlz is "32" and the result of the shift is one.
+ APInt UnknownBits = ~KnownZero;
+ if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
+
+ // Otherwise, check to see if there is exactly one bit input to the ctlz.
+ if ((UnknownBits & (UnknownBits - 1)) == 0) {
+ // Okay, we know that only that the single bit specified by UnknownBits
+ // could be set on input to the CTLZ node. If this bit is set, the SRL
+ // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+ // to an SRL/XOR pair, which is likely to simplify more.
+ unsigned ShAmt = UnknownBits.countTrailingZeros();
+ SDValue Op = N0.getOperand(0);
+
+ if (ShAmt) {
+ SDLoc DL(N0);
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+ DAG.getConstant(ShAmt, DL,
+ getShiftAmountTy(Op.getValueType())));
+ AddToWorklist(Op.getNode());
+ }
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT,
+ Op, DAG.getConstant(1, DL, VT));
+ }
+ }
+
+ // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+ if (N1.getOpcode() == ISD::TRUNCATE &&
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ // fold operands of srl based on knowledge that the low bits are not
+ // demanded.
+ if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (N1C && !N1C->isOpaque())
+ if (SDValue NewSRL = visitShiftByConstant(N, N1C))
+ return NewSRL;
+
+ // Attempt to convert a srl of a load into a narrower zero-extending load.
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
+ return NarrowLoad;
+
+ // Here is a common situation. We want to optimize:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // However when after the source operand of SRL is optimized into AND, the SRL
+ // itself may not be optimized further. Look for it and add the BRCOND into
+ // the worklist.
+ if (N->hasOneUse()) {
+ SDNode *Use = *N->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorklist(Use);
+ else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+ // Also look pass the truncate.
+ Use = *Use->use_begin();
+ if (Use->getOpcode() == ISD::BRCOND)
+ AddToWorklist(Use);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBSWAP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (bswap c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
+ // fold (bswap (bswap x)) -> x
+ if (N0.getOpcode() == ISD::BSWAP)
+ return N0->getOperand(0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (bitreverse (bitreverse x)) -> x
+ if (N0.getOpcode() == ISD::BITREVERSE)
+ return N0.getOperand(0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctlz_zero_undef c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (cttz_zero_undef c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ctpop c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
+ return SDValue();
+}
+
+
+/// \brief Generate Min/Max node
+static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ ISD::CondCode CC, const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ switch (CC) {
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
+ if (TLI.isOperationLegal(Opcode, VT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: {
+ unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
+ if (TLI.isOperationLegal(Opcode, VT))
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS);
+ return SDValue();
+ }
+ default:
+ return SDValue();
+ }
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT VT0 = N0.getValueType();
+
+ // fold (select C, X, X) -> X
+ if (N1 == N2)
+ return N1;
+ if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
+ // fold (select true, X, Y) -> X
+ // fold (select false, X, Y) -> Y
+ return !N0C->isNullValue() ? N1 : N2;
+ }
+ // fold (select C, 1, X) -> (or C, X)
+ if (VT == MVT::i1 && isOneConstant(N1))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
+ // fold (select C, 0, 1) -> (xor C, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
+ if (VT.isInteger() &&
+ (VT0 == MVT::i1 || (VT0.isInteger() &&
+ TLI.getBooleanContents(false, false) ==
+ TLI.getBooleanContents(false, true) &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
+ isNullConstant(N1) && isOneConstant(N2)) {
+ SDValue XORNode;
+ if (VT == VT0) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::XOR, DL, VT0,
+ N0, DAG.getConstant(1, DL, VT0));
+ }
+ SDLoc DL0(N0);
+ XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
+ N0, DAG.getConstant(1, DL0, VT0));
+ AddToWorklist(XORNode.getNode());
+ if (VT.bitsGT(VT0))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
+ }
+ // fold (select C, 0, X) -> (and (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
+ SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
+ AddToWorklist(NOTNode.getNode());
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
+ }
+ // fold (select C, X, 1) -> (or (not C), X)
+ if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
+ SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
+ AddToWorklist(NOTNode.getNode());
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
+ }
+ // fold (select C, X, 0) -> (and C, X)
+ if (VT == MVT::i1 && isNullConstant(N2))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
+ // fold (select X, X, Y) -> (or X, Y)
+ // fold (select X, 1, Y) -> (or X, Y)
+ if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
+ // fold (select X, Y, X) -> (and X, Y)
+ // fold (select X, Y, 0) -> (and X, Y)
+ if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ if (VT0 == MVT::i1) {
+ // The code in this block deals with the following 2 equivalences:
+ // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
+ // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
+ // The target can specify its prefered form with the
+ // shouldNormalizeToSelectSequence() callback. However we always transform
+ // to the right anyway if we find the inner select exists in the DAG anyway
+ // and we always transform to the left side if we know that we can further
+ // optimize the combination of the conditions.
+ bool normalizeToSequence
+ = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ if (normalizeToSequence || !InnerSelect.use_empty())
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+ InnerSelect, N2);
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ if (normalizeToSequence || !InnerSelect.use_empty())
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+ InnerSelect);
+ }
+
+ // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+ if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
+ SDValue N1_0 = N1->getOperand(0);
+ SDValue N1_1 = N1->getOperand(1);
+ SDValue N1_2 = N1->getOperand(2);
+ if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
+ // Create the actual and node if we can generate good code for it.
+ if (!normalizeToSequence) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+ N0, N1_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+ N1_1, N2);
+ }
+ // Otherwise see if we can optimize the "and" to a better pattern.
+ if (SDValue Combined = visitANDLike(N0, N1_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1_1, N2);
+ }
+ }
+ // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+ if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
+ SDValue N2_0 = N2->getOperand(0);
+ SDValue N2_1 = N2->getOperand(1);
+ SDValue N2_2 = N2->getOperand(2);
+ if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
+ // Create the actual or node if we can generate good code for it.
+ if (!normalizeToSequence) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+ N0, N2_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+ N1, N2_2);
+ }
+ // Otherwise see if we can optimize to a better pattern.
+ if (SDValue Combined = visitORLike(N0, N2_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1, N2_2);
+ }
+ }
+ }
+
+ // fold selects based on a setcc into other things, such as min/max/abs
+ if (N0.getOpcode() == ISD::SETCC) {
+ // select x, y (fcmp lt x, y) -> fminnum x, y
+ // select x, y (fcmp gt x, y) -> fmaxnum x, y
+ //
+ // This is OK if we don't care about what happens if either operand is a
+ // NaN.
+ //
+
+ // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
+ // no signed zeros as well as no nans.
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (Options.UnsafeFPMath &&
+ VT.isFloatingPoint() && N0.hasOneUse() &&
+ DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+
+ if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1), N1, N2, CC,
+ TLI, DAG))
+ return FMinMax;
+ }
+
+ if ((!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1),
+ N1, N2, N0.getOperand(2));
+ return SimplifySelect(SDLoc(N), N0, N1, N2);
+ }
+
+ return SDValue();
+}
+
+static
+std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Split the inputs.
+ SDValue Lo, Hi, LL, LH, RL, RH;
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+
+ return std::make_pair(Lo, Hi);
+}
+
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+ // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
+ // binary ones here.
+ if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
+ return SDValue();
+
+ // We're sure we have an even number of elements due to the
+ // concat_vectors we have as arguments to vselect.
+ // Skip BV elements until we find one that's not an UNDEF
+ // After we find an UNDEF element, keep looping until we get to half the
+ // length of the BV and see if all the non-undef nodes are the same.
+ ConstantSDNode *BottomHalf = nullptr;
+ for (int i = 0; i < NumElems / 2; ++i) {
+ if (Cond->getOperand(i)->isUndef())
+ continue;
+
+ if (BottomHalf == nullptr)
+ BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != BottomHalf)
+ return SDValue();
+ }
+
+ // Do the same for the second half of the BuildVector
+ ConstantSDNode *TopHalf = nullptr;
+ for (int i = NumElems / 2; i < NumElems; ++i) {
+ if (Cond->getOperand(i)->isUndef())
+ continue;
+
+ if (TopHalf == nullptr)
+ TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != TopHalf)
+ return SDValue();
+ }
+
+ assert(TopHalf && BottomHalf &&
+ "One half of the selector was all UNDEFs and the other was all the "
+ "same value. This should have been addressed before this function.");
+ return DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, VT,
+ BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
+SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue Mask = MSC->getMask();
+ SDValue Data = MSC->getValue();
+ SDLoc DL(N);
+
+ // If the MSCATTER data type requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+ if (Mask.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
+
+ SDValue Chain = MSC->getChain();
+
+ EVT MemoryVT = MSC->getMemoryVT();
+ unsigned Alignment = MSC->getOriginalAlignment();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ SDValue BasePtr = MSC->getBasePtr();
+ SDValue IndexLo, IndexHi;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MSC->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, MSC->getAAInfo(), MSC->getRanges());
+
+ SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
+ Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
+ DL, OpsLo, MMO);
+
+ SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
+ Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+ DL, OpsHi, MMO);
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGCombiner::visitMSTORE(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ SDValue Data = MST->getValue();
+ SDLoc DL(N);
+
+ // If the MSTORE data type requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+ if (Mask.getOpcode() == ISD::SETCC) {
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
+
+ SDValue Chain = MST->getChain();
+ SDValue Ptr = MST->getBasePtr();
+
+ EVT MemoryVT = MST->getMemoryVT();
+ unsigned Alignment = MST->getOriginalAlignment();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MST->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, MST->getAAInfo(), MST->getRanges());
+
+ Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
+ MST->isTruncatingStore());
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MST->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MST->getAAInfo(),
+ MST->getRanges());
+
+ Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
+ MST->isTruncatingStore());
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMGATHER(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
+ SDValue Mask = MGT->getMask();
+ SDLoc DL(N);
+
+ // If the MGATHER result requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+ if (Mask.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ SDValue Src0 = MGT->getValue();
+ SDValue Src0Lo, Src0Hi;
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+ SDValue Chain = MGT->getChain();
+ EVT MemoryVT = MGT->getMemoryVT();
+ unsigned Alignment = MGT->getOriginalAlignment();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue BasePtr = MGT->getBasePtr();
+ SDValue Index = MGT->getIndex();
+ SDValue IndexLo, IndexHi;
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MGT->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+ SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
+ Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
+ MMO);
+
+ SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
+ Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
+ MMO);
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
+
+ SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+ SDValue RetOps[] = { GatherRes, Chain };
+ return DAG.getMergeValues(RetOps, DL);
+}
+
+SDValue DAGCombiner::visitMLOAD(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+ SDValue Mask = MLD->getMask();
+ SDLoc DL(N);
+
+ // If the MLOAD result requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+ if (Mask.getOpcode() == ISD::SETCC) {
+ EVT VT = N->getValueType(0);
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ SDValue Src0 = MLD->getSrc0();
+ SDValue Src0Lo, Src0Hi;
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Chain = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ EVT MemoryVT = MLD->getMemoryVT();
+ unsigned Alignment = MLD->getOriginalAlignment();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ ISD::NON_EXTLOAD);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ ISD::NON_EXTLOAD);
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
+
+ SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+ SDValue RetOps[] = { LoadRes, Chain };
+ return DAG.getMergeValues(RetOps, DL);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitVSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDLoc DL(N);
+
+ // Canonicalize integer abs.
+ // vselect (setg[te] X, 0), X, -X ->
+ // vselect (setgt X, -1), X, -X ->
+ // vselect (setl[te] X, 0), -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ bool isAbs = false;
+ bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
+ N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
+ else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
+ N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+
+ if (isAbs) {
+ EVT VT = LHS.getValueType();
+ SDValue Shift = DAG.getNode(
+ ISD::SRA, DL, VT, LHS,
+ DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
+ AddToWorklist(Shift.getNode());
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
+ }
+ }
+
+ if (SimplifySelectOps(N, N1, N2))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // If the VSELECT result requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+ if (N0.getOpcode() == ISD::SETCC) {
+ EVT VT = N->getValueType(0);
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
+ std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
+
+ // Add the new VSELECT nodes to the work list in case they need to be split
+ // again.
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+ }
+
+ // Fold (vselect (build_vector all_ones), N1, N2) -> N1
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N1;
+ // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N2;
+
+ // The ConvertSelectToConcatVector function is assuming both the above
+ // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+ // and addressed.
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N2.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
+ return CV;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ SDValue N4 = N->getOperand(4);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+ // fold select_cc lhs, rhs, x, x, cc -> x
+ if (N2 == N3)
+ return N2;
+
+ // Determine if the condition we're dealing with is constant
+ if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
+ CC, SDLoc(N), false)) {
+ AddToWorklist(SCC.getNode());
+
+ if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
+ if (!SCCC->isNullValue())
+ return N2; // cond always true -> true val
+ else
+ return N3; // cond always false -> false val
+ } else if (SCC->isUndef()) {
+ // When the condition is UNDEF, just return the first operand. This is
+ // coherent the DAG creation, no setcc node is created in this case
+ return N2;
+ } else if (SCC.getOpcode() == ISD::SETCC) {
+ // Fold to a simpler select_cc
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+ SCC.getOperand(2));
+ }
+ }
+
+ // If we can fold this based on the true/false value, do so.
+ if (SimplifySelectOps(N, N2, N3))
+ return SDValue(N, 0); // Don't revisit N.
+
+ // fold select_cc into other things, such as min/max/abs
+ return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+ return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+ cast<CondCodeSDNode>(N->getOperand(2))->get(),
+ SDLoc(N));
+}
+
+SDValue DAGCombiner::visitSETCCE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+
+ // If Carry is false, fold to a regular SETCC.
+ if (Carry.getOpcode() == ISD::CARRY_FALSE)
+ return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+ return SDValue();
+}
+
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
+/// a build_vector of constants.
+/// This function is called by the DAGCombiner when visiting sext/zext/aext
+/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+/// Vector extends are not folded if operations are legal; this is to
+/// avoid introducing illegal build_vector dag nodes.
+static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG, bool LegalTypes,
+ bool LegalOperations) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
+ Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
+ && "Expected EXTEND dag node in input!");
+
+ // fold (sext c1) -> c1
+ // fold (zext c1) -> c1
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
+
+ // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
+ EVT SVT = VT.getScalarType();
+ if (!(VT.isVector() &&
+ (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
+ return nullptr;
+
+ // We can fold this node into a build_vector.
+ unsigned VTBits = SVT.getSizeInBits();
+ unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
+ SmallVector<SDValue, 8> Elts;
+ unsigned NumElts = VT.getVectorNumElements();
+ SDLoc DL(N);
+
+ for (unsigned i=0; i != NumElts; ++i) {
+ SDValue Op = N0->getOperand(i);
+ if (Op->isUndef()) {
+ Elts.push_back(DAG.getUNDEF(SVT));
+ continue;
+ }
+
+ SDLoc DL(Op);
+ // Get the constant value and if needed trunc it to the size of the type.
+ // Nodes like build_vector might have constants wider than the scalar type.
+ APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
+ if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+ Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
+ else
+ Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
+ }
+
+ return DAG.getBuildVector(VT, DL, Elts).getNode();
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+ unsigned ExtOpc,
+ SmallVectorImpl<SDNode *> &ExtendNodes,
+ const TargetLowering &TLI) {
+ bool HasCopyToRegUses = false;
+ bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == N)
+ continue;
+ if (UI.getUse().getResNo() != N0.getResNo())
+ continue;
+ // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+ if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+ if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+ // Sign bits will be lost after a zext.
+ return false;
+ bool Add = false;
+ for (unsigned i = 0; i != 2; ++i) {
+ SDValue UseOp = User->getOperand(i);
+ if (UseOp == N0)
+ continue;
+ if (!isa<ConstantSDNode>(UseOp))
+ return false;
+ Add = true;
+ }
+ if (Add)
+ ExtendNodes.push_back(User);
+ continue;
+ }
+ // If truncates aren't free and there are users we can't
+ // extend, it isn't worthwhile.
+ if (!isTruncFree)
+ return false;
+ // Remember if this value is live-out.
+ if (User->getOpcode() == ISD::CopyToReg)
+ HasCopyToRegUses = true;
+ }
+
+ if (HasCopyToRegUses) {
+ bool BothLiveOut = false;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+ BothLiveOut = true;
+ break;
+ }
+ }
+ if (BothLiveOut)
+ // Both unextended and extended values are live out. There had better be
+ // a good reason for the transformation.
+ return ExtendNodes.size();
+ }
+ return true;
+}
+
+void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
+ SDValue Trunc, SDValue ExtLoad,
+ const SDLoc &DL, ISD::NodeType ExtType) {
+ // Extend SetCC uses if necessary.
+ for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+ SDNode *SetCC = SetCCs[i];
+ SmallVector<SDValue, 4> Ops;
+
+ for (unsigned j = 0; j != 2; ++j) {
+ SDValue SOp = SetCC->getOperand(j);
+ if (SOp == Trunc)
+ Ops.push_back(ExtLoad);
+ else
+ Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+ }
+
+ Ops.push_back(SetCC->getOperand(2));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
+ }
+}
+
+// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
+SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+
+ assert((N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) &&
+ "Unexpected node type (not an extend)!");
+
+ // fold (sext (load x)) to multiple smaller sextloads; same for zext.
+ // For example, on a target with legal v4i32, but illegal v8i32, turn:
+ // (v8i32 (sext (v8i16 (load x))))
+ // into:
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))
+ // Where uses of the original load, i.e.:
+ // (v8i16 (load x))
+ // are replaced with:
+ // (v8i16 (truncate
+ // (v8i32 (concat_vectors (v4i32 (sextload x)),
+ // (v4i32 (sextload (x + 16)))))))
+ //
+ // This combine is only applicable to illegal, but splittable, vectors.
+ // All legal types, and illegal non-vector types, are handled elsewhere.
+ // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
+ //
+ if (N0->getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
+ !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
+ !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SmallVector<SDNode *, 4> SetCCs;
+ if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
+ return SDValue();
+
+ ISD::LoadExtType ExtType =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+
+ // Try to split the vector types to get down to legal types.
+ EVT SplitSrcVT = SrcVT;
+ EVT SplitDstVT = DstVT;
+ while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
+ SplitSrcVT.getVectorNumElements() > 1) {
+ SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
+ SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
+ }
+
+ if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ const unsigned NumSplits =
+ DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
+ const unsigned Stride = SplitSrcVT.getStoreSize();
+ SmallVector<SDValue, 4> Loads;
+ SmallVector<SDValue, 4> Chains;
+
+ SDValue BasePtr = LN0->getBasePtr();
+ for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
+ const unsigned Offset = Idx * Stride;
+ const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+
+ SDValue SplitLoad = DAG.getExtLoad(
+ ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
+ LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+
+ BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, DL, BasePtr.getValueType()));
+
+ Loads.push_back(SplitLoad.getValue(0));
+ Chains.push_back(SplitLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+
+ CombineTo(N, NewValue);
+
+ // Replace uses of the original load (before extension)
+ // with a truncate of the concatenated sextloaded vectors.
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
+ CombineTo(N0.getNode(), Trunc, NewChain);
+ ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
+ (ISD::NodeType)N->getOpcode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
+ // fold (sext (sext x)) -> (sext x)
+ // fold (sext (aext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
+ N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (sext (truncate (load x))) -> (sext (smaller load x))
+ // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // See if the value being truncated is already sign extended. If so, just
+ // eliminate the trunc/sext pair.
+ SDValue Op = N0.getOperand(0);
+ unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
+ unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+ unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+ if (OpBits == DestBits) {
+ // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
+ // bits, it is already ready.
+ if (NumSignBits > DestBits-MidBits)
+ return Op;
+ } else if (OpBits < DestBits) {
+ // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
+ // bits, just sext from i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
+ } else {
+ // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
+ // bits, just truncate to i32.
+ if (NumSignBits > OpBits-MidBits)
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ }
+
+ // fold (sext (truncate x)) -> (sextinreg x).
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+ N0.getValueType())) {
+ if (OpBits < DestBits)
+ Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
+ else if (OpBits > DestBits)
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
+ DAG.getValueType(N0.getValueType()));
+ }
+ }
+
+ // fold (sext (load x)) -> (sext (truncate (sextload x)))
+ // Only generate vector extloads when 1) they're legal, and 2) they are
+ // deemed desirable by the target.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ((!LegalOperations && !VT.isVector() &&
+ !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (load x)) to multiple smaller sextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
+ // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+ // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+ if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), MemVT,
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (sextload x), (sext cst))
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+ SetCCs, TLI);
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemoryVT(),
+ LN0->getMemOperand());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.sext(VT.getSizeInBits());
+ SDLoc DL(N);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ SDLoc(N0.getOperand(0)),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
+ ISD::SIGN_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations &&
+ TLI.getBooleanContents(N0VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+ // of the same size as the compared operands. Only optimize sext(setcc())
+ // if this is the case.
+ EVT SVT = getSetCCResultType(N0VT);
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == SVT.getSizeInBits())
+ return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVectorType) {
+ SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
+ N0.getOperand(0), N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
+ }
+ }
+
+ // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
+ unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+ SDLoc DL(N);
+ SDValue NegOne =
+ DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1), NegOne,
+ DAG.getConstant(0, DL, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ return SCC;
+
+ if (!VT.isVector()) {
+ EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
+ SDLoc DL(N);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
+ N0.getOperand(0), N0.getOperand(1), CC);
+ return DAG.getSelect(DL, VT, SetCC,
+ NegOne, DAG.getConstant(0, DL, VT));
+ }
+ }
+ }
+
+ // fold (sext x) -> (zext x) if the sign bit is known zero.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+ DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero in KnownZero.
+// This function computes KnownZero to avoid a duplicated call to
+// computeKnownBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ APInt &KnownZero) {
+ APInt KnownOne;
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
+ return true;
+ }
+
+ if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
+ cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ if (isNullConstant(Op0))
+ Op = Op1;
+ else if (isNullConstant(Op1))
+ Op = Op0;
+ else
+ return false;
+
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
+
+ if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+ return false;
+
+ return true;
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
+ // fold (zext (zext x)) -> (zext x)
+ // fold (zext (aext x)) -> (zext x)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
+ N0.getOperand(0));
+
+ // fold (zext (truncate x)) -> (zext x) or
+ // (zext (truncate x)) -> (truncate x)
+ // This is valid when the truncated bits of x are already zero.
+ // FIXME: We should extend this to work for vectors too.
+ SDValue Op;
+ APInt KnownZero;
+ if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+ APInt TruncatedBits =
+ (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
+ APInt(Op.getValueSizeInBits(), 0) :
+ APInt::getBitsSet(Op.getValueSizeInBits(),
+ N0.getValueSizeInBits(),
+ std::min(Op.getValueSizeInBits(),
+ VT.getSizeInBits()));
+ if (TruncatedBits == (KnownZero & TruncatedBits)) {
+ if (VT.bitsGT(Op.getValueType()))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
+ if (VT.bitsLT(Op.getValueType()))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+
+ return Op;
+ }
+ }
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (truncate x)) -> (and x, mask)
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode *oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ EVT SrcVT = N0.getOperand(0).getValueType();
+ EVT MinVT = N0.getValueType();
+
+ // Try to mask before the extension to avoid having to generate a larger mask,
+ // possibly over several sub-vectors.
+ if (SrcVT.bitsLT(VT)) {
+ if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
+ SDValue Op = N0.getOperand(0);
+ Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ AddToWorklist(Op.getNode());
+ return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ }
+ }
+
+ if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
+ SDValue Op = N0.getOperand(0);
+ if (SrcVT.bitsLT(VT)) {
+ Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ } else if (SrcVT.bitsGT(VT)) {
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ AddToWorklist(Op.getNode());
+ }
+ return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ }
+ }
+
+ // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+ // if either of the casts is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType()) ||
+ !TLI.isZExtFree(N0.getValueType(), VT))) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ X, DAG.getConstant(Mask, DL, VT));
+ }
+
+ // fold (zext (load x)) -> (zext (truncate (zextload x)))
+ // Only generate vector extloads when 1) they're legal, and 2) they are
+ // deemed desirable by the target.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ ((!LegalOperations && !VT.isVector() &&
+ !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+ if (VT.isVector())
+ DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (zext (load x)) to multiple smaller zextloads.
+ // Only on illegal but splittable vectors.
+ if (SDValue ExtLoad = CombineExtLoad(N))
+ return ExtLoad;
+
+ // fold (zext (and/or/xor (load x), cst)) ->
+ // (and/or/xor (zextload x), (zext cst))
+ // Unless (and (load x) cst) will match as a zextload already and has
+ // additional users.
+ if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+ N0.getOpcode() == ISD::XOR) &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
+ (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+ if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::AND) {
+ auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
+ auto NarrowLoad = false;
+ EVT LoadResultTy = AndC->getValueType(0);
+ EVT ExtVT, LoadedVT;
+ if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
+ NarrowLoad))
+ DoXform = false;
+ }
+ if (DoXform)
+ DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
+ ISD::ZERO_EXTEND, SetCCs, TLI);
+ }
+ if (DoXform) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemoryVT(),
+ LN0->getMemOperand());
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDLoc DL(N);
+ SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+ ExtLoad, DAG.getConstant(Mask, DL, VT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+ SDLoc(N0.getOperand(0)),
+ N0.getOperand(0).getValueType(), ExtLoad);
+ CombineTo(N, And);
+ CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
+ ISD::ZERO_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+ // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+ if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ if ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), MemVT,
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
+ ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // Only do this before legalize for now.
+ if (!LegalOperations && VT.isVector() &&
+ N0.getValueType().getVectorElementType() == MVT::i1) {
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (getSetCCResultType(N00VT) == N0.getValueType())
+ return SDValue();
+
+ // We know that the # elements of the results is the same as the #
+ // elements of the compare (and the # elements of the compare result for
+ // that matter). Check to see that they are the same size. If so, we know
+ // that the element size of the sext'd result matches the element size of
+ // the compare operands.
+ SDLoc DL(N);
+ SDValue VecOnes = DAG.getConstant(1, DL, VT);
+ if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
+ // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), N0.getOperand(2));
+ return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
+ }
+
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingElementType = EVT::getIntegerVT(
+ *DAG.getContext(), N00VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType = EVT::getVectorVT(
+ *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1), N0.getOperand(2));
+ return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
+ VecOnes);
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDLoc DL(N);
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ return SCC;
+ }
+
+ // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.hasOneUse()) {
+ SDValue ShAmt = N0.getOperand(1);
+ unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ if (N0.getOpcode() == ISD::SHL) {
+ SDValue InnerZExt = N0.getOperand(0);
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+ InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ if (ShAmtVal > KnownZeroBits)
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (VT.getSizeInBits() >= 256)
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+ ShAmt);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
+ // fold (aext (aext x)) -> (aext x)
+ // fold (aext (zext x)) -> (zext x)
+ // fold (aext (sext x)) -> (sext x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+
+ // fold (aext (truncate (load x))) -> (aext (smaller load x))
+ // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorklist(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (truncate x))
+ if (N0.getOpcode() == ISD::TRUNCATE) {
+ SDValue TruncOp = N0.getOperand(0);
+ if (TruncOp.getValueType() == VT)
+ return TruncOp; // x iff x size == zext size.
+ if (TruncOp.getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
+ }
+
+ // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+ // if the trunc is not free.
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+ N0.getValueType())) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ if (X.getValueType().bitsLT(VT)) {
+ X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
+ } else if (X.getValueType().bitsGT(VT)) {
+ X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
+ }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask = Mask.zext(VT.getSizeInBits());
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT,
+ X, DAG.getConstant(Mask, DL, VT));
+ }
+
+ // fold (aext (load x)) -> (aext (truncate (extload x)))
+ // None of the supported targets knows how to perform load and any_ext
+ // on vectors in one instruction. We only perform this transformation on
+ // scalars.
+ if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ bool DoXform = true;
+ SmallVector<SDNode*, 4> SetCCs;
+ if (!N0.hasOneUse())
+ DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+ if (DoXform) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad);
+ CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+ ISD::ANY_EXTEND);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+ // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+ // fold (aext ( extload x)) -> (aext (truncate (extload x)))
+ if (N0.getOpcode() == ISD::LOAD &&
+ !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ ISD::LoadExtType ExtType = LN0->getExtensionType();
+ EVT MemVT = LN0->getMemoryVT();
+ if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ if (N0.getOpcode() == ISD::SETCC) {
+ // For vectors:
+ // aext(setcc) -> vsetcc
+ // aext(setcc) -> truncate(vsetcc)
+ // aext(setcc) -> aext(vsetcc)
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/any extend
+ else {
+ EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
+ SDValue VsetCC =
+ DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
+ }
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ SDLoc DL(N);
+ if (SDValue SCC = SimplifySelectCC(
+ DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
+ DAG.getConstant(0, DL, VT),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ return SCC;
+ }
+
+ return SDValue();
+}
+
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by Mask are used. If so, return the simpler operand,
+/// otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ assert(CV && "Const value should be ConstSDNode.");
+ const APInt &CVal = CV->getAPIntValue();
+ APInt NewVal = CVal & Mask;
+ if (NewVal != CVal)
+ return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ // If the LHS or RHS don't contribute bits to the or, drop them.
+ if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+ return V.getOperand(1);
+ if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+ return V.getOperand(0);
+ break;
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= Mask.getBitWidth()) break;
+ APInt NewMask = Mask << Amt;
+ if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
+ return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
+ SimplifyLHS, V.getOperand(1));
+ }
+ }
+ return SDValue();
+}
+
+/// If the result of a wider load is shifted to right of N bits and then
+/// truncated to a narrower type and where N is a multiple of number of bits of
+/// the narrower type, transform it to a narrower load from address + N / num of
+/// bits of new type. If the result is to be extended, also fold the extension
+/// to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT ExtVT = VT;
+
+ // This transformation isn't valid for vector loads.
+ if (VT.isVector())
+ return SDValue();
+
+ // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+ // extended to VT.
+ if (Opc == ISD::SIGN_EXTEND_INREG) {
+ ExtType = ISD::SEXTLOAD;
+ ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ } else if (Opc == ISD::SRL) {
+ // Another special-case: SRL is basically zero-extending a narrower value.
+ ExtType = ISD::ZEXTLOAD;
+ N0 = SDValue(N, 0);
+ ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01) return SDValue();
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - N01->getZExtValue());
+ }
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
+ return SDValue();
+
+ unsigned EVTBits = ExtVT.getSizeInBits();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return SDValue();
+
+ unsigned ShAmt = 0;
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShAmt = N01->getZExtValue();
+ // Is the shift amount a multiple of size of VT?
+ if ((ShAmt & (EVTBits-1)) == 0) {
+ N0 = N0.getOperand(0);
+ // Is the load width a multiple of size of VT?
+ if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+ return SDValue();
+ }
+
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
+ // Because a SRL must be assumed to *need* to zero-extend the high bits
+ // (as opposed to anyext the high bits), we can't combine the zextload
+ // lowering of SRL and an sextload.
+ if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
+ return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ return SDValue();
+ }
+ }
+
+ // If the load is shifted left (and the result isn't shifted back right),
+ // we can fold the truncate through the shift.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
+ }
+ }
+
+ // If we haven't found a load, we can't narrow it. Don't transform one with
+ // multiple uses, this would require adding a new load.
+ if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
+ return SDValue();
+
+ // Don't change the width of a volatile load.
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (LN0->isVolatile())
+ return SDValue();
+
+ // Verify that we are actually reducing a load width here.
+ if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
+ return SDValue();
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (LN0->getNumValues() > 2)
+ return SDValue();
+
+ // If the load that we're shrinking is an extload and we're not just
+ // discarding the extension we can't simply shrink the load. Bail.
+ // TODO: It would be possible to merge the extensions in some cases.
+ if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
+ LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
+ return SDValue();
+
+ if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
+ return SDValue();
+
+ EVT PtrType = N0.getOperand(1).getValueType();
+
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ // It's not possible to generate a constant of extended or untyped type.
+ return SDValue();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (DAG.getDataLayout().isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDLoc DL(LN0);
+ // The original load itself didn't wrap, so an offset within it doesn't.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, DL, PtrType),
+ &Flags);
+ AddToWorklist(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+ else
+ Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
+ NewAlign, LN0->getMemOperand()->getFlags(),
+ LN0->getAAInfo());
+
+ // Replace the old load's chain with the new load's chain.
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy(Result.getValueType());
+ if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ // If the shift amount is as large as the result size (but, presumably,
+ // no larger than the source) then the useful bits of the result are
+ // zero; we can't simply return the shortened shift, because the result
+ // of that operation is undefined.
+ SDLoc DL(N0);
+ if (ShLeftAmt >= VT.getSizeInBits())
+ Result = DAG.getConstant(0, DL, VT);
+ else
+ Result = DAG.getNode(ISD::SHL, DL, VT,
+ Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
+ }
+
+ // Return the new loaded value.
+ return Result;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ // fold (sext_in_reg c1) -> c1
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
+
+ // If the input is already sign extended, just drop the extension.
+ if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+ return N0;
+
+ // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
+ N0.getOperand(0), N1);
+
+ // fold (sext_in_reg (sext x)) -> (sext x)
+ // fold (sext_in_reg (aext x)) -> (sext x)
+ // if x is small enough.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+ }
+
+ // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+ if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
+
+ // fold operands of sext_in_reg based on knowledge that the top bits are not
+ // demanded.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (sext_in_reg (load x)) -> (smaller sextload x)
+ // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+ if (SDValue NarrowLoad = ReduceLoadWidth(N))
+ return NarrowLoad;
+
+ // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+ // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+ // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+ if (N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+ // We can turn this into an SRA iff the input to the SRL is already sign
+ // extended enough.
+ unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+ if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1));
+ }
+ }
+
+ // fold (sext_inreg (extload x)) -> (sextload x)
+ if (ISD::isEXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), EVT,
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ AddToWorklist(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse() &&
+ EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), EVT,
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+ if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+ if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+ N0.getOperand(1), false))
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
+ BSwap, N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+
+ // noop truncate
+ if (N0.getValueType() == N->getValueType(0))
+ return N0;
+ // fold (truncate c1) -> c1
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
+ // fold (truncate (truncate x)) -> (truncate x)
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+ // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+ if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND) {
+ // if the source is smaller than the dest, we still need an extend.
+ if (N0.getOperand(0).getValueType().bitsLT(VT))
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+ // if the source is larger than the dest, than we just need the truncate.
+ if (N0.getOperand(0).getValueType().bitsGT(VT))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+ // if the source and dest are the same type, we can drop both the extend
+ // and the truncate.
+ return N0.getOperand(0);
+ }
+
+ // Fold extract-and-trunc into a narrow extract. For example:
+ // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+ // i32 y = TRUNCATE(i64 x)
+ // -- becomes --
+ // v16i8 b = BITCAST (v2i64 val)
+ // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+ //
+ // Note: We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
+
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ unsigned NumElem = VecTy.getVectorNumElements();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDLoc DL(N);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
+ DAG.getBitcast(NVT, N0.getOperand(0)),
+ DAG.getConstant(Index, DL, IndexTy));
+ }
+ }
+
+ // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
+ if (N0.getOpcode() == ISD::SELECT) {
+ EVT SrcVT = N0.getValueType();
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
+ TLI.isTruncateFree(SrcVT, VT)) {
+ SDLoc SL(N0);
+ SDValue Cond = N0.getOperand(0);
+ SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+ }
+ }
+
+ // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2
+ if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
+ TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
+ if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t Amt = CAmt->getZExtValue();
+ unsigned Size = VT.getSizeInBits();
+
+ if (Amt < Size / 2) {
+ SDLoc SL(N);
+ EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SHL, SL, VT, Trunc,
+ DAG.getConstant(Amt, SL, AmtVT));
+ }
+ }
+ }
+
+ // Fold a series of buildvector, bitcast, and truncate if possible.
+ // For example fold
+ // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+ // (2xi32 (buildvector x, y)).
+ if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+ N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getOperand(0).hasOneUse()) {
+
+ SDValue BuildVect = N0.getOperand(0);
+ EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+ EVT TruncVecEltTy = VT.getVectorElementType();
+
+ // Check that the element types match.
+ if (BuildVectEltTy == TruncVecEltTy) {
+ // Now we only need to compute the offset of the truncated elements.
+ unsigned BuildVecNumElts = BuildVect.getNumOperands();
+ unsigned TruncVecNumElts = VT.getVectorNumElements();
+ unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+ assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+ "Invalid number of elements");
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ Opnds.push_back(BuildVect.getOperand(i));
+
+ return DAG.getBuildVector(VT, SDLoc(N), Opnds);
+ }
+ }
+
+ // See if we can simplify the input to this truncate through knowledge that
+ // only the low bits are being used.
+ // For example "trunc (or (shl x, 8), y)" // -> trunc y
+ // Currently we only perform this optimization on scalars because vectors
+ // may have different active low bits.
+ if (!VT.isVector()) {
+ if (SDValue Shorter =
+ GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+ VT.getSizeInBits())))
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
+ }
+ // fold (truncate (load x)) -> (smaller load x)
+ // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+ if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+ if (SDValue Reduced = ReduceLoadWidth(N))
+ return Reduced;
+
+ // Handle the case where the load remains an extending load even
+ // after truncation.
+ if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ if (!LN0->isVolatile() &&
+ LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
+ SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemoryVT(),
+ LN0->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
+ return NewLoad;
+ }
+ }
+ }
+ // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
+ // where ... are all 'undef'.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
+ SmallVector<EVT, 8> VTs;
+ SDValue V;
+ unsigned Idx = 0;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue X = N0.getOperand(i);
+ if (!X.isUndef()) {
+ V = X;
+ Idx = i;
+ NumDefs++;
+ }
+ // Stop if more than one members are non-undef.
+ if (NumDefs > 1)
+ break;
+ VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ X.getValueType().getVectorNumElements()));
+ }
+
+ if (NumDefs == 0)
+ return DAG.getUNDEF(VT);
+
+ if (NumDefs == 1) {
+ assert(V.getNode() && "The single defined operand is empty!");
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (i != Idx) {
+ Opnds.push_back(DAG.getUNDEF(VTs[i]));
+ continue;
+ }
+ SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
+ AddToWorklist(NV.getNode());
+ Opnds.push_back(NV);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
+ }
+ }
+
+ // Fold truncate of a bitcast of a vector to an extract of the low vector
+ // element.
+ //
+ // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
+ if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
+ SDValue VecSrc = N0.getOperand(0);
+ EVT SrcVT = VecSrc.getValueType();
+ if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
+ SDLoc SL(N);
+
+ EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
+ VecSrc, DAG.getConstant(0, SL, IdxVT));
+ }
+ }
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+ SDValue Elt = N->getOperand(i);
+ if (Elt.getOpcode() != ISD::MERGE_VALUES)
+ return Elt.getNode();
+ return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+ assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ LD1->getAddressSpace() != LD2->getAddressSpace())
+ return SDValue();
+ EVT LD1VT = LD1->getValueType(0);
+ unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
+ if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
+ unsigned Align = LD1->getAlignment();
+ unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+ VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign <= Align &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), Align);
+ }
+
+ return SDValue();
+}
+
+static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
+ // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
+ // and Lo parts; on big-endian machines it doesn't.
+ return DAG.getDataLayout().isBigEndian() ? 1 : 0;
+}
+
+static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ // If this is not a bitcast to an FP type or if the target doesn't have
+ // IEEE754-compliant FP logic, we're done.
+ EVT VT = N->getValueType(0);
+ if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
+ return SDValue();
+
+ // TODO: Use splat values for the constant-checking below and remove this
+ // restriction.
+ SDValue N0 = N->getOperand(0);
+ EVT SourceVT = N0.getValueType();
+ if (SourceVT.isVector())
+ return SDValue();
+
+ unsigned FPOpcode;
+ APInt SignMask;
+ switch (N0.getOpcode()) {
+ case ISD::AND:
+ FPOpcode = ISD::FABS;
+ SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
+ break;
+ case ISD::XOR:
+ FPOpcode = ISD::FNEG;
+ SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
+ break;
+ // TODO: ISD::OR --> ISD::FNABS?
+ default:
+ return SDValue();
+ }
+
+ // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
+ // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
+ SDValue LogicOp0 = N0.getOperand(0);
+ ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
+ LogicOp0.getOpcode() == ISD::BITCAST &&
+ LogicOp0->getOperand(0).getValueType() == VT)
+ return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+ // Only do this before legalize, since afterward the target may be depending
+ // on the bitconvert.
+ // First check to see if this is all constant.
+ if (!LegalTypes &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ VT.isVector()) {
+ bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
+
+ EVT DestEltVT = N->getValueType(0).getVectorElementType();
+ assert(!DestEltVT.isVector() &&
+ "Element type of vector ValueType must not be vector!");
+ if (isSimple)
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ }
+
+ // If the input is a constant, let getNode fold it.
+ if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+ // If we can't allow illegal operations, we need to check that this is just
+ // a fp -> int or int -> conversion and that the resulting operation will
+ // be legal.
+ if (!LegalOperations ||
+ (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
+ (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
+ TLI.isOperationLegal(ISD::Constant, VT)))
+ return DAG.getBitcast(VT, N0);
+ }
+
+ // (conv (conv x, t1), t2) -> (conv x, t2)
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getBitcast(VT, N0.getOperand(0));
+
+ // fold (conv (load x)) -> (load (conv*)x)
+ // If the resultant load doesn't need a higher alignment than the original!
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile() &&
+ // Do not remove the cast if the types differ in endian layout.
+ TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
+ TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ unsigned OrigAlign = LN0->getAlignment();
+
+ bool Fast = false;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ LN0->getAddressSpace(), OrigAlign, &Fast) &&
+ Fast) {
+ SDValue Load =
+ DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), OrigAlign,
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+
+ if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
+ return V;
+
+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fneg x)) ->
+ // flipbit = signbit
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ //
+ // fold (bitcast (fabs x)) ->
+ // flipbit = (and (extract_element (bitcast x), 0), signbit)
+ // (xor (bitcast x) (build_pair flipbit, flipbit))
+ // This often reduces constant pool loads.
+ if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
+ (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
+ N0.getNode()->hasOneUse() && VT.isInteger() &&
+ !VT.isVector() && !N0.getValueType().isVector()) {
+ SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
+ AddToWorklist(NewConv.getNode());
+
+ SDLoc DL(N);
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ assert(VT.getSizeInBits() == 128);
+ SDValue SignBit = DAG.getConstant(
+ APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+ SDValue FlipBit;
+ if (N0.getOpcode() == ISD::FNEG) {
+ FlipBit = SignBit;
+ AddToWorklist(FlipBit.getNode());
+ } else {
+ assert(N0.getOpcode() == ISD::FABS);
+ SDValue Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(NewConv)));
+ AddToWorklist(Hi.getNode());
+ FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
+ AddToWorklist(FlipBit.getNode());
+ }
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
+ }
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ if (N0.getOpcode() == ISD::FNEG)
+ return DAG.getNode(ISD::XOR, DL, VT,
+ NewConv, DAG.getConstant(SignBit, DL, VT));
+ assert(N0.getOpcode() == ISD::FABS);
+ return DAG.getNode(ISD::AND, DL, VT,
+ NewConv, DAG.getConstant(~SignBit, DL, VT));
+ }
+
+ // fold (bitconvert (fcopysign cst, x)) ->
+ // (or (and (bitconvert x), sign), (and cst, (not sign)))
+ // Note that we don't handle (copysign x, cst) because this can always be
+ // folded to an fneg or fabs.
+ //
+ // For ppc_fp128:
+ // fold (bitcast (fcopysign cst, x)) ->
+ // flipbit = (and (extract_element
+ // (xor (bitcast cst), (bitcast x)), 0),
+ // signbit)
+ // (xor (bitcast cst) (build_pair flipbit, flipbit))
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+ VT.isInteger() && !VT.isVector()) {
+ unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+ EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+ if (isTypeLegal(IntXVT)) {
+ SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
+ AddToWorklist(X.getNode());
+
+ // If X has a different width than the result/lhs, sext it or truncate it.
+ unsigned VTWidth = VT.getSizeInBits();
+ if (OrigXWidth < VTWidth) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
+ AddToWorklist(X.getNode());
+ } else if (OrigXWidth > VTWidth) {
+ // To get the sign bit in the right place, we have to shift it right
+ // before truncating.
+ SDLoc DL(X);
+ X = DAG.getNode(ISD::SRL, DL,
+ X.getValueType(), X,
+ DAG.getConstant(OrigXWidth-VTWidth, DL,
+ X.getValueType()));
+ AddToWorklist(X.getNode());
+ X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
+ AddToWorklist(X.getNode());
+ }
+
+ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
+ SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
+ AddToWorklist(Cst.getNode());
+ SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
+ AddToWorklist(X.getNode());
+ SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
+ AddToWorklist(XorResult.getNode());
+ SDValue XorResult64 = DAG.getNode(
+ ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
+ DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+ SDLoc(XorResult)));
+ AddToWorklist(XorResult64.getNode());
+ SDValue FlipBit =
+ DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
+ DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
+ AddToWorklist(FlipBit.getNode());
+ SDValue FlipBits =
+ DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+ AddToWorklist(FlipBits.getNode());
+ return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
+ }
+ APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+ X = DAG.getNode(ISD::AND, SDLoc(X), VT,
+ X, DAG.getConstant(SignBit, SDLoc(X), VT));
+ AddToWorklist(X.getNode());
+
+ SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
+ Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
+ Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
+ AddToWorklist(Cst.getNode());
+
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
+ }
+ }
+
+ // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+ if (N0.getOpcode() == ISD::BUILD_PAIR)
+ if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
+ return CombineLD;
+
+ // Remove double bitcasts from shuffles - this is often a legacy of
+ // XformToShuffleWithZero being used to combine bitmaskings (of
+ // float vectors bitcast to integer vectors) into shuffles.
+ // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
+ N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
+ !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
+
+ // If operands are a bitcast, peek through if it casts the original VT.
+ // If operands are a constant, just bitcast back to original VT.
+ auto PeekThroughBitcast = [&](SDValue Op) {
+ if (Op.getOpcode() == ISD::BITCAST &&
+ Op.getOperand(0).getValueType() == VT)
+ return SDValue(Op.getOperand(0));
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ return DAG.getBitcast(VT, Op);
+ return SDValue();
+ };
+
+ SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
+ SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
+ if (!(SV0 && SV1))
+ return SDValue();
+
+ int MaskScale =
+ VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
+ SmallVector<int, 8> NewMask;
+ for (int M : SVN->getMask())
+ for (int i = 0; i != MaskScale; ++i)
+ NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
+
+ bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+ if (!LegalMask) {
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+ }
+
+ if (LegalMask)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ return CombineConsecutiveLoads(N, VT);
+}
+
+/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
+/// operands. DstEltVT indicates the destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+ // If this is already the right type, we're done.
+ if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+ unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+ unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+ // If this is a conversion of N elements of one type to N elements of another
+ // type, convert each element. This handles FP<->INT cases.
+ if (SrcBitSize == DstBitSize) {
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+
+ // Due to the FP element handling below calling this routine recursively,
+ // we can end up with a scalar-to-vector node here.
+ if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
+ DAG.getBitcast(DstEltVT, BV->getOperand(0)));
+
+ SmallVector<SDValue, 8> Ops;
+ for (SDValue Op : BV->op_values()) {
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (Op.getValueType() != SrcEltVT)
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
+ Ops.push_back(DAG.getBitcast(DstEltVT, Op));
+ AddToWorklist(Ops.back().getNode());
+ }
+ return DAG.getBuildVector(VT, SDLoc(BV), Ops);
+ }
+
+ // Otherwise, we're growing or shrinking the elements. To avoid having to
+ // handle annoying details of growing/shrinking FP values, we convert them to
+ // int first.
+ if (SrcEltVT.isFloatingPoint()) {
+ // Convert the input float vector to a int vector where the elements are the
+ // same sizes.
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
+ SrcEltVT = IntVT;
+ }
+
+ // Now we know the input is an integer vector. If the output is a FP type,
+ // convert to integer first, then to FP of the right size.
+ if (DstEltVT.isFloatingPoint()) {
+ EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+ // Next, convert to FP elements of the same size.
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
+ }
+
+ SDLoc DL(BV);
+
+ // Okay, we know the src/dst types are both integers of differing types.
+ // Handling growing first.
+ assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+ if (SrcBitSize < DstBitSize) {
+ unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+ i += NumInputsPerOutput) {
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+ APInt NewBits = APInt(DstBitSize, 0);
+ bool EltIsUndef = true;
+ for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+ // Shift the previously computed bits over.
+ NewBits <<= SrcBitSize;
+ SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+ if (Op.isUndef()) continue;
+ EltIsUndef = false;
+
+ NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
+ zextOrTrunc(SrcBitSize).zext(DstBitSize);
+ }
+
+ if (EltIsUndef)
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
+ }
+
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ // Finally, this must be the case where we are shrinking elements: each input
+ // turns into multiple outputs.
+ unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ NumOutputsPerInput*BV->getNumOperands());
+ SmallVector<SDValue, 8> Ops;
+
+ for (const SDValue &Op : BV->op_values()) {
+ if (Op.isUndef()) {
+ Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
+ continue;
+ }
+
+ APInt OpVal = cast<ConstantSDNode>(Op)->
+ getAPIntValue().zextOrTrunc(SrcBitSize);
+
+ for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+ APInt ThisVal = OpVal.trunc(DstBitSize);
+ Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
+ OpVal = OpVal.lshr(DstBitSize);
+ }
+
+ // For big endian targets, swap the order of the pieces of each element.
+ if (DAG.getDataLayout().isBigEndian())
+ std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+ }
+
+ return DAG.getBuildVector(VT, DL, Ops);
+}
+
+/// Try to perform FMA combining on a given FADD node.
+SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
+ ;
+ if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && N0.getOpcode() == ISD::FMUL &&
+ N1.getOpcode() == ISD::FMUL) {
+ if (N0.getNode()->use_size() > N1.getNode()->use_size())
+ std::swap(N0, N1);
+ }
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N1.getOperand(0), N1.getOperand(1), N0);
+ }
+
+ // Look through FP_EXTEND nodes to do more combining.
+ if (AllowFusion && LookThroughFPExt) {
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)), N1);
+ }
+
+ // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)), N0);
+ }
+ }
+
+ // More folding opportunities when target permits.
+ if ((AllowFusion || HasFMAD) && Aggressive) {
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
+ if (N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ N1));
+ }
+
+ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
+ if (N1->getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N1.getOperand(0), N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N1.getOperand(2).getOperand(0),
+ N1.getOperand(2).getOperand(1),
+ N0));
+ }
+
+ if (AllowFusion && LookThroughFPExt) {
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ auto FoldFAddFMAFPExtFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (N020.getOpcode() == ISD::FMUL)
+ return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+ N020.getOperand(0), N020.getOperand(1),
+ N1);
+ }
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ auto FoldFAddFPExtFMAFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (N002.getOpcode() == ISD::FMUL)
+ return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+ N002.getOperand(0), N002.getOperand(1),
+ N1);
+ }
+ }
+
+ // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+ // -> (fma y, z, (fma (fpext u), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode) {
+ SDValue N12 = N1.getOperand(2);
+ if (N12.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N12.getOperand(0);
+ if (N120.getOpcode() == ISD::FMUL)
+ return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+ N120.getOperand(0), N120.getOperand(1),
+ N0);
+ }
+ }
+
+ // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+ // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == PreferredFusedOpcode) {
+ SDValue N102 = N10.getOperand(2);
+ if (N102.getOpcode() == ISD::FMUL)
+ return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+ N102.getOperand(0), N102.getOperand(1),
+ N0);
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Try to perform FMA combining on a given FSUB node.
+SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
+ if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+ if (N0.getOpcode() == ISD::FMUL &&
+ (Aggressive || N0->hasOneUse())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FMUL &&
+ (Aggressive || N1->hasOneUse()))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1), N0);
+
+ // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (N0.getOpcode() == ISD::FNEG &&
+ N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
+ SDValue N00 = N0.getOperand(0).getOperand(0);
+ SDValue N01 = N0.getOperand(0).getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // Look through FP_EXTEND nodes to do more combining.
+ if (AllowFusion && LookThroughFPExt) {
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
+ }
+
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)),
+ N0);
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x), (fpext y), z))
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x)), (fpext y), z)
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (N000.getOpcode() == ISD::FMUL) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
+ }
+ }
+ }
+
+ }
+
+ // More folding opportunities when target permits.
+ if ((AllowFusion || HasFMAD) && Aggressive) {
+ // fold (fsub (fma x, y, (fmul u, v)), z)
+ // -> (fma x, y (fma u, v, (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode &&
+ N0.getOperand(2).getOpcode() == ISD::FMUL) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+
+ // fold (fsub x, (fma y, z, (fmul u, v)))
+ // -> (fma (fneg y), z, (fma (fneg u), v, x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ SDValue N20 = N1.getOperand(2).getOperand(0);
+ SDValue N21 = N1.getOperand(2).getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N20),
+
+ N21, N0));
+ }
+
+ if (AllowFusion && LookThroughFPExt) {
+ // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (N020.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+ }
+
+ // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y),
+ // (fma (fpext u), (fpext v), (fneg z)))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (N002.getOpcode() == ISD::FMUL)
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
+ }
+ }
+
+ // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+ // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N1.getOperand(2).getOperand(0);
+ if (N120.getOpcode() == ISD::FMUL) {
+ SDValue N1200 = N120.getOperand(0);
+ SDValue N1201 = N120.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1201),
+ N0));
+ }
+ }
+
+ // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+ // -> (fma (fneg (fpext y)), (fpext z),
+ // (fma (fneg (fpext u)), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ SDValue N100 = N1.getOperand(0).getOperand(0);
+ SDValue N101 = N1.getOperand(0).getOperand(1);
+ SDValue N102 = N1.getOperand(0).getOperand(2);
+ if (N102.getOpcode() == ISD::FMUL) {
+ SDValue N1020 = N102.getOperand(0);
+ SDValue N1021 = N102.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1021),
+ N0));
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// Try to perform FMA combining on a given FMUL node.
+SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc SL(N);
+
+ assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
+
+ const TargetOptions &Options = DAG.getTarget().Options;
+ bool AllowFusion =
+ (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+
+ // Floating-point multiply-add with intermediate rounding.
+ bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA =
+ AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return SDValue();
+
+ // Always prefer FMAD to FMA for precision.
+ unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+ bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+ // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
+ // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
+ auto FuseFADD = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
+ auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+ if (XC1 && XC1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ if (XC1 && XC1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFADD(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFADD(N1, N0))
+ return FMA;
+
+ // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
+ // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
+ // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
+ // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
+ auto FuseFSUB = [&](SDValue X, SDValue Y) {
+ if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
+ auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
+ if (XC0 && XC0->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ Y);
+ if (XC0 && XC0->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+
+ auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+ if (XC1 && XC1->isExactlyValue(+1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
+ if (XC1 && XC1->isExactlyValue(-1.0))
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+ }
+ return SDValue();
+ };
+
+ if (SDValue FMA = FuseFSUB(N0, N1))
+ return FMA;
+ if (SDValue FMA = FuseFSUB(N1, N0))
+ return FMA;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (fadd c1, c2) -> c1 + c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
+
+ // canonicalize constant to RHS
+ if (N0CFP && !N1CFP)
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+
+ // fold (fadd A, (fneg B)) -> (fsub A, B)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
+ return DAG.getNode(ISD::FSUB, DL, VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+
+ // fold (fadd (fneg A), B) -> (fsub B, A)
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+ isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
+ return DAG.getNode(ISD::FSUB, DL, VT, N1,
+ GetNegatedExpression(N0, DAG, LegalOperations), Flags);
+
+ // If 'unsafe math' is enabled, fold lots of things.
+ if (Options.UnsafeFPMath) {
+ // No FP constant should be created after legalization as Instruction
+ // Selection pass has a hard time dealing with FP constants.
+ bool AllowNewConst = (Level < AfterLegalizeDAG);
+
+ // fold (fadd A, 0) -> A
+ if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+ if (N1C->isZero())
+ return N0;
+
+ // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+ if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
+ return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
+ Flags),
+ Flags);
+
+ // If allowed, fold (fadd (fneg x), x) -> 0.0
+ if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
+ return DAG.getConstantFP(0.0, DL, VT);
+
+ // If allowed, fold (fadd x, (fneg x)) -> 0.0
+ if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
+ return DAG.getConstantFP(0.0, DL, VT);
+
+ // We can fold chains of FADD's of the same value into multiplications.
+ // This transform is not safe in general because we are reducing the number
+ // of rounding steps.
+ if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
+ if (N0.getOpcode() == ISD::FMUL) {
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
+
+ // (fadd (fmul x, c), x) -> (fmul x, c+1)
+ if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
+ }
+
+ // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
+ if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FMUL) {
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
+
+ // (fadd x, (fmul x, c)) -> (fmul x, c+1)
+ if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
+ }
+
+ // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
+ if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N0.getOperand(0)) {
+ SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+ DAG.getConstantFP(2.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
+ bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ // (fadd (fadd x, x), x) -> (fmul x, 3.0)
+ if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
+ (N0.getOperand(0) == N1)) {
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N1, DAG.getConstantFP(3.0, DL, VT), Flags);
+ }
+ }
+
+ if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
+ bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
+ if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+ N1.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ N0, DAG.getConstantFP(3.0, DL, VT), Flags);
+ }
+ }
+
+ // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
+ if (AllowNewConst &&
+ N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
+ N0.getOperand(0) == N0.getOperand(1) &&
+ N1.getOperand(0) == N1.getOperand(1) &&
+ N0.getOperand(0) == N1.getOperand(0)) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
+ DAG.getConstantFP(4.0, DL, VT), Flags);
+ }
+ }
+ } // enable-unsafe-fp-math
+
+ // FADD -> FMA combines:
+ if (SDValue Fused = visitFADDForFMACombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (fsub c1, c2) -> c1-c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
+
+ // fold (fsub A, (fneg B)) -> (fadd A, B)
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+ return DAG.getNode(ISD::FADD, dl, VT, N0,
+ GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+
+ // If 'unsafe math' is enabled, fold lots of things.
+ if (Options.UnsafeFPMath) {
+ // (fsub A, 0) -> A
+ if (N1CFP && N1CFP->isZero())
+ return N0;
+
+ // (fsub 0, B) -> -B
+ if (N0CFP && N0CFP->isZero()) {
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+ return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, dl, VT, N1);
+ }
+
+ // (fsub x, x) -> 0.0
+ if (N0 == N1)
+ return DAG.getConstantFP(0.0f, dl, VT);
+
+ // (fsub x, (fadd x, y)) -> (fneg y)
+ // (fsub x, (fadd y, x)) -> (fneg y)
+ if (N1.getOpcode() == ISD::FADD) {
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+
+ if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
+ return GetNegatedExpression(N11, DAG, LegalOperations);
+
+ if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
+ return GetNegatedExpression(N10, DAG, LegalOperations);
+ }
+ }
+
+ // FSUB -> FMA combines:
+ if (SDValue Fused = visitFSUBForFMACombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+
+ // fold vector ops
+ if (VT.isVector()) {
+ // This just handles C1 * C2 for vectors. Other vector folds are below.
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+ }
+
+ // fold (fmul c1, c2) -> c1*c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
+
+ // canonicalize constant to RHS
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
+
+ // fold (fmul A, 1.0) -> A
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return N0;
+
+ if (Options.UnsafeFPMath) {
+ // fold (fmul A, 0) -> 0
+ if (N1CFP && N1CFP->isZero())
+ return N1;
+
+ // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+ if (N0.getOpcode() == ISD::FMUL) {
+ // Fold scalars or any vector constants (not just splats).
+ // This fold is done in general by InstCombine, but extra fmul insts
+ // may have been generated during lowering.
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
+ auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
+
+ // Check 1: Make sure that the first operand of the inner multiply is NOT
+ // a constant. Otherwise, we may induce infinite looping.
+ if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
+ // Check 2: Make sure that the second operand of the inner multiply and
+ // the second operand of the outer multiply are constants.
+ if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+ (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
+ }
+ }
+ }
+
+ // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
+ // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
+ // during an early run of DAGCombiner can prevent folding with fmuls
+ // inserted during lowering.
+ if (N0.getOpcode() == ISD::FADD &&
+ (N0.getOperand(0) == N0.getOperand(1)) &&
+ N0.hasOneUse()) {
+ const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
+ }
+ }
+
+ // fold (fmul X, 2.0) -> (fadd X, X)
+ if (N1CFP && N1CFP->isExactlyValue(+2.0))
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
+
+ // fold (fmul X, -1.0) -> (fneg X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0))
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT, N0);
+
+ // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FMUL, DL, VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
+ }
+ }
+
+ // FMUL -> FMA combines:
+ if (SDValue Fused = visitFMULForFMACombine(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMA(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+
+ // Constant fold FMA.
+ if (isa<ConstantFPSDNode>(N0) &&
+ isa<ConstantFPSDNode>(N1) &&
+ isa<ConstantFPSDNode>(N2)) {
+ return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
+ }
+
+ if (Options.UnsafeFPMath) {
+ if (N0CFP && N0CFP->isZero())
+ return N2;
+ if (N1CFP && N1CFP->isZero())
+ return N2;
+ }
+ // TODO: The FMA node should have flags that propagate to these nodes.
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
+ if (N1CFP && N1CFP->isExactlyValue(1.0))
+ return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
+
+ // Canonicalize (fma c, x, y) -> (fma x, c, y)
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
+
+ // TODO: FMA nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+
+ if (Options.UnsafeFPMath) {
+ // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+ if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+ &Flags), &Flags);
+ }
+
+ // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+ if (N0.getOpcode() == ISD::FMUL &&
+ isConstantFPBuildVectorOrConstantFP(N1) &&
+ isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return DAG.getNode(ISD::FMA, dl, VT,
+ N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+ &Flags),
+ N2);
+ }
+ }
+
+ // (fma x, 1, y) -> (fadd x, y)
+ // (fma x, -1, y) -> (fadd (fneg x), y)
+ if (N1CFP) {
+ if (N1CFP->isExactlyValue(1.0))
+ // TODO: The FMA node should have flags that propagate to this node.
+ return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+
+ if (N1CFP->isExactlyValue(-1.0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
+ SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+ AddToWorklist(RHSNeg.getNode());
+ // TODO: The FMA node should have flags that propagate to this node.
+ return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+ }
+ }
+
+ if (Options.UnsafeFPMath) {
+ // (fma x, c, x) -> (fmul x, (c+1))
+ if (N1CFP && N0 == N2) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(1.0, dl, VT),
+ &Flags), &Flags);
+ }
+
+ // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+ if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+ return DAG.getNode(ISD::FMUL, dl, VT, N0,
+ DAG.getNode(ISD::FADD, dl, VT,
+ N1, DAG.getConstantFP(-1.0, dl, VT),
+ &Flags), &Flags);
+ }
+ }
+
+ return SDValue();
+}
+
+// Combine multiple FDIVs with the same divisor into multiple FMULs by the
+// reciprocal.
+// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// Notice that this is not always beneficial. One reason is different target
+// may have different costs for FDIV and FMUL, so sometimes the cost of two
+// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+ bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
+ const SDNodeFlags *Flags = N->getFlags();
+ if (!UnsafeMath && !Flags->hasAllowReciprocal())
+ return SDValue();
+
+ // Skip if current node is a reciprocal.
+ SDValue N0 = N->getOperand(0);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ if (N0CFP && N0CFP->isExactlyValue(1.0))
+ return SDValue();
+
+ // Exit early if the target does not want this transform or if there can't
+ // possibly be enough uses of the divisor to make the transform worthwhile.
+ SDValue N1 = N->getOperand(1);
+ unsigned MinUses = TLI.combineRepeatedFPDivisors();
+ if (!MinUses || N1->use_size() < MinUses)
+ return SDValue();
+
+ // Find all FDIV users of the same divisor.
+ // Use a set because duplicates may be present in the user list.
+ SetVector<SDNode *> Users;
+ for (auto *U : N1->uses()) {
+ if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // This division is eligible for optimization only if global unsafe math
+ // is enabled or if this division allows reciprocal formation.
+ if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
+ Users.insert(U);
+ }
+ }
+
+ // Now that we have the actual number of divisor uses, make sure it meets
+ // the minimum threshold specified by the target.
+ if (Users.size() < MinUses)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
+
+ // Dividend / Divisor -> Dividend * Reciprocal
+ for (auto *U : Users) {
+ SDValue Dividend = U->getOperand(0);
+ if (Dividend != FPOne) {
+ SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+ Reciprocal, Flags);
+ CombineTo(U, NewNode);
+ } else if (U != Reciprocal.getNode()) {
+ // In the absence of fast-math-flags, this user node is always the
+ // same node as Reciprocal, but with FMF they may be different nodes.
+ CombineTo(U, Reciprocal);
+ }
+ }
+ return SDValue(N, 0); // N was replaced.
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ const TargetOptions &Options = DAG.getTarget().Options;
+ SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
+
+ // fold vector ops
+ if (VT.isVector())
+ if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ return FoldedVOp;
+
+ // fold (fdiv c1, c2) -> c1/c2
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
+
+ if (Options.UnsafeFPMath) {
+ // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+ if (N1CFP) {
+ // Compute the reciprocal 1.0 / c2.
+ const APFloat &N1APF = N1CFP->getValueAPF();
+ APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+ APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+ // Only do the transform if the reciprocal is a legal fp immediate that
+ // isn't too nasty (eg NaN, denormal, ...).
+ if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+ (!LegalOperations ||
+ // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+ // backend)... we should handle this gracefully after Legalize.
+ // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(Recip, VT)))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(Recip, DL, VT), Flags);
+ }
+
+ // If this FDIV is part of a reciprocal square root, it may be folded
+ // into a target-specific square root estimate instruction.
+ if (N1.getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ }
+ } else if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
+ RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ }
+ } else if (N1.getOpcode() == ISD::FP_ROUND &&
+ N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+ Flags)) {
+ RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ }
+ } else if (N1.getOpcode() == ISD::FMUL) {
+ // Look through an FMUL. Even though this won't remove the FDIV directly,
+ // it's still worthwhile to get rid of the FSQRT if possible.
+ SDValue SqrtOp;
+ SDValue OtherOp;
+ if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(0);
+ OtherOp = N1.getOperand(1);
+ } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
+ SqrtOp = N1.getOperand(1);
+ OtherOp = N1.getOperand(0);
+ }
+ if (SqrtOp.getNode()) {
+ // We found a FSQRT, so try to make this fold:
+ // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
+ if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
+ RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ }
+ }
+ }
+
+ // Fold into a reciprocal estimate and multiply instead of a real divide.
+ if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
+ AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ }
+ }
+
+ // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+ // Both can be negated for free, check to see if at least one is cheaper
+ // negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
+ GetNegatedExpression(N0, DAG, LegalOperations),
+ GetNegatedExpression(N1, DAG, LegalOperations),
+ Flags);
+ }
+ }
+
+ if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
+ return CombineRepeatedDivisors;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ // fold (frem c1, c2) -> fmod(c1,c2)
+ if (N0CFP && N1CFP)
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
+ &cast<BinaryWithFlagsSDNode>(N)->Flags);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFSQRT(SDNode *N) {
+ if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
+ return SDValue();
+
+ // TODO: FSQRT nodes should have flags that propagate to the created nodes.
+ // For now, create a Flags object for use with all unsafe math transforms.
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
+ return buildSqrtEstimate(N->getOperand(0), &Flags);
+}
+
+/// copysign(x, fp_extend(y)) -> copysign(x, y)
+/// copysign(x, fp_round(y)) -> copysign(x, y)
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ SDValue N1 = N->getOperand(1);
+ if ((N1.getOpcode() == ISD::FP_EXTEND ||
+ N1.getOpcode() == ISD::FP_ROUND)) {
+ // Do not optimize out type conversion of f128 type yet.
+ // For some targets like x86_64, configuration is changed to keep one f128
+ // value in one SSE register, but instruction selection cannot handle
+ // FCOPYSIGN on SSE registers yet.
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+ return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+ }
+ return false;
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ EVT VT = N->getValueType(0);
+
+ if (N0CFP && N1CFP) // Constant fold
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
+
+ if (N1CFP) {
+ const APFloat& V = N1CFP->getValueAPF();
+ // copysign(x, c1) -> fabs(x) iff ispos(c1)
+ // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+ if (!V.isNegative()) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+ } else {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
+ }
+ }
+
+ // copysign(fabs(x), y) -> copysign(x, y)
+ // copysign(fneg(x), y) -> copysign(x, y)
+ // copysign(copysign(x,z), y) -> copysign(x, y)
+ if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+ N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
+ N0.getOperand(0), N1);
+
+ // copysign(x, abs(y)) -> abs(x)
+ if (N1.getOpcode() == ISD::FABS)
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+
+ // copysign(x, copysign(y,z)) -> copysign(x, z)
+ if (N1.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
+ N0, N1.getOperand(1));
+
+ // copysign(x, fp_extend(y)) -> copysign(x, y)
+ // copysign(x, fp_round(y)) -> copysign(x, y)
+ if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
+ N0, N1.getOperand(0));
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (sint_to_fp c1) -> c1fp
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
+
+ // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+ // but UINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
+ }
+
+ // The next optimizations are desirable only if SELECT_CC can be lowered.
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
+ // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
+ !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
+ }
+
+ // fold (sint_to_fp (zext (setcc x, y, cc))) ->
+ // (select_cc x, y, 1.0, 0.0,, cc)
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ SDValue Ops[] =
+ { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
+ N0.getOperand(0).getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N0.getValueType();
+
+ // fold (uint_to_fp c1) -> c1fp
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ // ...but only if the target supports immediate floating-point values
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
+
+ // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+ // but SINT_TO_FP is legal on this target, try to convert.
+ if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+ // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+ if (DAG.SignBitIsZero(N0))
+ return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
+ }
+
+ // The next optimizations are desirable only if SELECT_CC can be lowered.
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
+ // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
+
+ if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
+ (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ SDLoc DL(N);
+ SDValue Ops[] =
+ { N0.getOperand(0), N0.getOperand(1),
+ DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
+ N0.getOperand(2) };
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
+ }
+ }
+
+ return SDValue();
+}
+
+// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
+static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
+ return SDValue();
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+ bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
+ unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+ unsigned ActualSize = std::min(InputSize, OutputSize);
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+ // We can only fold away the float conversion if the input range can be
+ // represented exactly in the float range.
+ if (APFloat::semanticsPrecision(sem) >= ActualSize) {
+ if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
+ unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
+ }
+ if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
+ return DAG.getBitcast(VT, Src);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_sint c1fp) -> c1
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
+
+ return FoldIntToFPToInt(N, DAG);
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_to_uint c1fp) -> c1
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
+
+ return FoldIntToFPToInt(N, DAG);
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fp_round c1fp) -> c1fp
+ if (N0CFP)
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
+
+ // fold (fp_round (fp_extend x)) -> x
+ if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+ return N0.getOperand(0);
+
+ // fold (fp_round (fp_round x)) -> (fp_round x)
+ if (N0.getOpcode() == ISD::FP_ROUND) {
+ const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
+ const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+
+ // Skip this folding if it results in an fp_round from f80 to f16.
+ //
+ // f80 to f16 always generates an expensive (and as yet, unimplemented)
+ // libcall to __truncxfhf2 instead of selecting native f16 conversion
+ // instructions from f32 or f64. Moreover, the first (value-preserving)
+ // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
+ // x86.
+ if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
+ return SDValue();
+
+ // If the first fp_round isn't a value preserving truncation, it might
+ // introduce a tie in the second fp_round, that wouldn't occur in the
+ // single-step fp_round we want to fold to.
+ // In other words, double rounding isn't the same as rounding.
+ // Also, this is a value preserving truncation iff both fp_round's are.
+ if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+ DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+ }
+ }
+
+ // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
+ N0.getOperand(0), N1);
+ AddToWorklist(Tmp.getNode());
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
+ Tmp, N0.getOperand(1));
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+ // fold (fp_round_inreg c1fp) -> c1fp
+ if (N0CFP && isTypeLegal(EVT)) {
+ SDLoc DL(N);
+ SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
+ return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fp_extend c1fp) -> c1fp
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
+
+ // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
+ if (N0.getOpcode() == ISD::FP16_TO_FP &&
+ TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
+
+ // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+ // value of X.
+ if (N0.getOpcode() == ISD::FP_ROUND
+ && N0.getNode()->getConstantOperandVal(1) == 1) {
+ SDValue In = N0.getOperand(0);
+ if (In.getValueType() == VT) return In;
+ if (VT.bitsLT(In.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
+ In, N0.getOperand(1));
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
+ }
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
+ LN0->getChain(),
+ LN0->getBasePtr(), N0.getValueType(),
+ LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
+ N0.getValueType(), ExtLoad,
+ DAG.getIntPtrConstant(1, SDLoc(N0))),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fceil c1) -> fceil(c1)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ftrunc c1) -> ftrunc(c1)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (ffloor c1) -> ffloor(c1)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
+
+ return SDValue();
+}
+
+// FIXME: FNEG and FABS have a lot in common; refactor.
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Constant fold FNEG.
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
+
+ if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+ &DAG.getTarget().Options))
+ return GetNegatedExpression(N0, DAG, LegalOperations);
+
+ // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
+ // constant pool values.
+ if (!TLI.isFNegFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST &&
+ N0.getNode()->hasOneUse()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For a vector, get a mask such as 0x80... per scalar element
+ // and splat it.
+ SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For a scalar, just generate 0x80...
+ SignMask = APInt::getSignBit(IntVT.getSizeInBits());
+ }
+ SDLoc DL0(N0);
+ Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
+ DAG.getConstant(SignMask, DL0, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getBitcast(VT, Int);
+ }
+ }
+
+ // (fneg (fmul c, x)) -> (fmul -c, x)
+ if (N0.getOpcode() == ISD::FMUL &&
+ (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
+ ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+ if (CFP1) {
+ APFloat CVal = CFP1->getValueAPF();
+ CVal.changeSign();
+ if (Level >= AfterLegalizeDAG &&
+ (TLI.isFPImmLegal(CVal, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT)))
+ return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+ N0.getOperand(1)),
+ &cast<BinaryWithFlagsSDNode>(N0)->Flags);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+
+ if (N0CFP && N1CFP) {
+ const APFloat &C0 = N0CFP->getValueAPF();
+ const APFloat &C1 = N1CFP->getValueAPF();
+ return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
+ }
+
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+ const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+
+ if (N0CFP && N1CFP) {
+ const APFloat &C0 = N0CFP->getValueAPF();
+ const APFloat &C1 = N1CFP->getValueAPF();
+ return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
+ }
+
+ // Canonicalize to constant on RHS.
+ if (isConstantFPBuildVectorOrConstantFP(N0) &&
+ !isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (fabs c1) -> fabs(c1)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+
+ // fold (fabs (fabs x)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FABS)
+ return N->getOperand(0);
+
+ // fold (fabs (fneg x)) -> (fabs x)
+ // fold (fabs (fcopysign x, y)) -> (fabs x)
+ if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+ return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
+
+ // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
+ // constant pool values.
+ if (!TLI.isFAbsFree(VT) &&
+ N0.getOpcode() == ISD::BITCAST &&
+ N0.getNode()->hasOneUse()) {
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+ if (IntVT.isInteger() && !IntVT.isVector()) {
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For a vector, get a mask such as 0x7f... per scalar element
+ // and splat it.
+ SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For a scalar, just generate 0x7f...
+ SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
+ }
+ SDLoc DL(N0);
+ Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
+ DAG.getConstant(SignMask, DL, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getBitcast(N->getValueType(0), Int);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+ // on the target.
+ if (N1.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ N1.getOperand(0).getValueType())) {
+ return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
+ Chain, N1.getOperand(2),
+ N1.getOperand(0), N1.getOperand(1), N2);
+ }
+
+ if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+ ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+ (N1.getOperand(0).hasOneUse() &&
+ N1.getOperand(0).getOpcode() == ISD::SRL))) {
+ SDNode *Trunc = nullptr;
+ if (N1.getOpcode() == ISD::TRUNCATE) {
+ // Look pass the truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
+
+ // Match this pattern so that we can generate simpler code:
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = srl i32 %b, 1
+ // brcond i32 %c ...
+ //
+ // into
+ //
+ // %a = ...
+ // %b = and i32 %a, 2
+ // %c = setcc eq %b, 0
+ // brcond %c ...
+ //
+ // This applies only when the AND constant value has one bit set and the
+ // SRL constant is equal to the log2 of the AND constant. The back-end is
+ // smart enough to convert the result into a TEST/JMP sequence.
+ SDValue Op0 = N1.getOperand(0);
+ SDValue Op1 = N1.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::AND &&
+ Op1.getOpcode() == ISD::Constant) {
+ SDValue AndOp1 = Op0.getOperand(1);
+
+ if (AndOp1.getOpcode() == ISD::Constant) {
+ const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+ if (AndConst.isPowerOf2() &&
+ cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+ SDLoc DL(N);
+ SDValue SetCC =
+ DAG.getSetCC(DL,
+ getSetCCResultType(Op0.getValueType()),
+ Op0, DAG.getConstant(0, DL, Op0.getValueType()),
+ ISD::SETNE);
+
+ SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
+ MVT::Other, Chain, SetCC, N2);
+ // Don't add the new BRCond into the worklist or else SimplifySelectCC
+ // will convert it back to (X & C1) >> C2.
+ CombineTo(N, NewBRCond, false);
+ // Truncate is dead.
+ if (Trunc)
+ deleteAndRecombine(Trunc);
+ // Replace the uses of SRL with SETCC
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ deleteAndRecombine(N1.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ if (Trunc)
+ // Restore N1 if the above transformation doesn't match.
+ N1 = N->getOperand(1);
+ }
+
+ // Transform br(xor(x, y)) -> br(x != y)
+ // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+ SDNode *TheXor = N1.getNode();
+ SDValue Op0 = TheXor->getOperand(0);
+ SDValue Op1 = TheXor->getOperand(1);
+ if (Op0.getOpcode() == Op1.getOpcode()) {
+ // Avoid missing important xor optimizations.
+ if (SDValue Tmp = visitXOR(TheXor)) {
+ if (Tmp.getNode() != TheXor) {
+ DEBUG(dbgs() << "\nReplacing.8 ";
+ TheXor->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Tmp.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
+ deleteAndRecombine(TheXor);
+ return DAG.getNode(ISD::BRCOND, SDLoc(N),
+ MVT::Other, Chain, Tmp, N2);
+ }
+
+ // visitXOR has changed XOR's operands or replaced the XOR completely,
+ // bail out.
+ return SDValue(N, 0);
+ }
+ }
+
+ if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+ bool Equal = false;
+ if (isOneConstant(Op0) && Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::XOR) {
+ TheXor = Op0.getNode();
+ Equal = true;
+ }
+
+ EVT SetCCVT = N1.getValueType();
+ if (LegalTypes)
+ SetCCVT = getSetCCResultType(SetCCVT);
+ SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
+ SetCCVT,
+ Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
+ // Replace the uses of XOR with SETCC
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ deleteAndRecombine(N1.getNode());
+ return DAG.getNode(ISD::BRCOND, SDLoc(N),
+ MVT::Other, Chain, SetCC, N2);
+ }
+ }
+
+ return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+ CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+ SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
+ // Use SimplifySetCC to simplify SETCC's.
+ SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
+ CondLHS, CondRHS, CC->get(), SDLoc(N),
+ false);
+ if (Simp.getNode()) AddToWorklist(Simp.getNode());
+
+ // fold to a simpler setcc
+ if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+ return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
+ N->getOperand(0), Simp.getOperand(2),
+ Simp.getOperand(0), Simp.getOperand(1),
+ N->getOperand(4));
+
+ return SDValue();
+}
+
+/// Return true if 'Use' is a load or a store that uses N as its base pointer
+/// and that N may be folded in the load / store addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ unsigned AS;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+ VT.getTypeForEVT(*DAG.getContext()), AS);
+}
+
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+ // out. There is no reason to make this a preinc/predec.
+ if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+ Ptr.getNode()->hasOneUse())
+ return false;
+
+ // Ask the target to do addressing mode selection.
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+ return false;
+
+ // Backends without true r+i pre-indexed forms may need to pass a
+ // constant base with a variable offset so that constant coercion
+ // will work with the patterns in canonical form.
+ bool Swapped = false;
+ if (isa<ConstantSDNode>(BasePtr)) {
+ std::swap(BasePtr, Offset);
+ Swapped = true;
+ }
+
+ // Don't create a indexed load / store with zero offset.
+ if (isNullConstant(Offset))
+ return false;
+
+ // Try turning it into a pre-indexed load / store except when:
+ // 1) The new base ptr is a frame index.
+ // 2) If N is a store and the new base ptr is either the same as or is a
+ // predecessor of the value being stored.
+ // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+ // that would create a cycle.
+ // 4) All uses are load / store ops that use it as old base ptr.
+
+ // Check #1. Preinc'ing a frame index would require copying the stack pointer
+ // (plus the implicit offset) to a register to preinc anyway.
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ return false;
+
+ // Check #2.
+ if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+ return false;
+ }
+
+ // Caches for hasPredecessorHelper.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(N);
+
+ // If the offset is a constant, there may be other adds of constants that
+ // can be folded with this one. We should do this to avoid having to keep
+ // a copy of the original base pointer.
+ SmallVector<SDNode *, 16> OtherUses;
+ if (isa<ConstantSDNode>(Offset))
+ for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
+ UE = BasePtr.getNode()->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ // Skip the use that is Ptr and uses of other results from BasePtr's
+ // node (important for nodes that return multiple results).
+ if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
+ continue;
+
+ if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
+ continue;
+
+ if (Use.getUser()->getOpcode() != ISD::ADD &&
+ Use.getUser()->getOpcode() != ISD::SUB) {
+ OtherUses.clear();
+ break;
+ }
+
+ SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
+ if (!isa<ConstantSDNode>(Op1)) {
+ OtherUses.clear();
+ break;
+ }
+
+ // FIXME: In some cases, we can be smarter about this.
+ if (Op1.getValueType() != Offset.getValueType()) {
+ OtherUses.clear();
+ break;
+ }
+
+ OtherUses.push_back(Use.getUser());
+ }
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Now check for #3 and #4.
+ bool RealUse = false;
+
+ for (SDNode *Use : Ptr.getNode()->uses()) {
+ if (Use == N)
+ continue;
+ if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
+ return false;
+
+ // If Ptr may be folded in addressing mode of other use, then it's
+ // not profitable to do this transformation.
+ if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
+ RealUse = true;
+ }
+
+ if (!RealUse)
+ return false;
+
+ SDValue Result;
+ if (isLoad)
+ Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
+ BasePtr, Offset, AM);
+ else
+ Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
+ BasePtr, Offset, AM);
+ ++PreIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.4 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ deleteAndRecombine(N);
+
+ if (Swapped)
+ std::swap(BasePtr, Offset);
+
+ // Replace other uses of BasePtr that can be updated to use Ptr
+ for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
+ unsigned OffsetIdx = 1;
+ if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
+ OffsetIdx = 0;
+ assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
+ BasePtr.getNode() && "Expected BasePtr operand");
+
+ // We need to replace ptr0 in the following expression:
+ // x0 * offset0 + y0 * ptr0 = t0
+ // knowing that
+ // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
+ //
+ // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
+ // indexed load/store and the expresion that needs to be re-written.
+ //
+ // Therefore, we have:
+ // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
+
+ ConstantSDNode *CN =
+ cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
+ int X0, X1, Y0, Y1;
+ const APInt &Offset0 = CN->getAPIntValue();
+ APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
+
+ X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+ Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+ X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+ Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
+
+ unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
+
+ APInt CNV = Offset0;
+ if (X0 < 0) CNV = -CNV;
+ if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
+ else CNV = CNV - Offset1;
+
+ SDLoc DL(OtherUses[i]);
+
+ // We can now generate the new expression.
+ SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
+ SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+
+ SDValue NewUse = DAG.getNode(Opcode,
+ DL,
+ OtherUses[i]->getValueType(0), NewOp1, NewOp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
+ deleteAndRecombine(OtherUses[i]);
+ }
+
+ // Replace the uses of Ptr with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
+ deleteAndRecombine(Ptr.getNode());
+
+ return true;
+}
+
+/// Try to combine a load/store with a add/sub of the base pointer node into a
+/// post-indexed load/store. The transformation folded the add/subtract into the
+/// new indexed load/store effectively and all of its uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+ !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ isLoad = false;
+ } else {
+ return false;
+ }
+
+ if (Ptr.getNode()->hasOneUse())
+ return false;
+
+ for (SDNode *Op : Ptr.getNode()->uses()) {
+ if (Op == N ||
+ (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+ continue;
+
+ SDValue BasePtr;
+ SDValue Offset;
+ ISD::MemIndexedMode AM = ISD::UNINDEXED;
+ if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+ // Don't create a indexed load / store with zero offset.
+ if (isNullConstant(Offset))
+ continue;
+
+ // Try turning it into a post-indexed load / store except when
+ // 1) All uses are load / store ops that use it as base ptr (and
+ // it may be folded as addressing mmode).
+ // 2) Op must be independent of N, i.e. Op is neither a predecessor
+ // nor a successor of N. Otherwise, if Op is folded that would
+ // create a cycle.
+
+ if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+ continue;
+
+ // Check for #1.
+ bool TryNext = false;
+ for (SDNode *Use : BasePtr.getNode()->uses()) {
+ if (Use == Ptr.getNode())
+ continue;
+
+ // If all the uses are load / store addresses, then don't do the
+ // transformation.
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ bool RealUse = false;
+ for (SDNode *UseUse : Use->uses()) {
+ if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
+ RealUse = true;
+ }
+
+ if (!RealUse) {
+ TryNext = true;
+ break;
+ }
+ }
+ }
+
+ if (TryNext)
+ continue;
+
+ // Check for #2
+ if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+ SDValue Result = isLoad
+ ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
+ BasePtr, Offset, AM);
+ ++PostIndexedNodes;
+ ++NodesCombined;
+ DEBUG(dbgs() << "\nReplacing.5 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Result.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorklistRemover DeadNodes(*this);
+ if (isLoad) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
+ } else {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
+ }
+
+ // Finally, since the node is now dead, remove it from the graph.
+ deleteAndRecombine(N);
+
+ // Replace the uses of Use with uses of the updated base value.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+ Result.getValue(isLoad ? 1 : 0));
+ deleteAndRecombine(Op);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/// \brief Return the base-pointer arithmetic from an indexed \p LD.
+SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ assert(AM != ISD::UNINDEXED);
+ SDValue BP = LD->getOperand(1);
+ SDValue Inc = LD->getOperand(2);
+
+ // Some backends use TargetConstants for load offsets, but don't expect
+ // TargetConstants in general ADD nodes. We can convert these constants into
+ // regular Constants (if the constant is not opaque).
+ assert((Inc.getOpcode() != ISD::TargetConstant ||
+ !cast<ConstantSDNode>(Inc)->isOpaque()) &&
+ "Cannot split out indexing using opaque target constants");
+ if (Inc.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
+ Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
+ ConstInc->getValueType(0));
+ }
+
+ unsigned Opc =
+ (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
+ return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // If load is not volatile and there are no uses of the loaded value (and
+ // the updated indexed value in case of indexed loads), change uses of the
+ // chain value into uses of the chain input (i.e. delete the dead load).
+ if (!LD->isVolatile()) {
+ if (N->getValueType(1) == MVT::Other) {
+ // Unindexed loads.
+ if (!N->hasAnyUseOfValue(0)) {
+ // It's not safe to use the two value CombineTo variant here. e.g.
+ // v1, chain2 = load chain1, loc
+ // v2, chain3 = load chain2, loc
+ // v3 = add v2, c
+ // Now we replace use of chain2 with chain1. This makes the second load
+ // isomorphic to the one we are deleting, and thus makes this load live.
+ DEBUG(dbgs() << "\nReplacing.6 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith chain: ";
+ Chain.getNode()->dump(&DAG);
+ dbgs() << "\n");
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+
+ if (N->use_empty())
+ deleteAndRecombine(N);
+
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ } else {
+ // Indexed loads.
+ assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+
+ // If this load has an opaque TargetConstant offset, then we cannot split
+ // the indexing into an add/sub directly (that TargetConstant may not be
+ // valid for a different type of node, and we cannot convert an opaque
+ // target constant into a regular constant).
+ bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
+ cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
+
+ if (!N->hasAnyUseOfValue(0) &&
+ ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
+ SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ SDValue Index;
+ if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
+ Index = SplitIndexingFromLoad(LD);
+ // Try to fold the base pointer arithmetic into subsequent loads and
+ // stores.
+ AddUsersToWorklist(N);
+ } else
+ Index = DAG.getUNDEF(N->getValueType(1));
+ DEBUG(dbgs() << "\nReplacing.7 ";
+ N->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Undef.getNode()->dump(&DAG);
+ dbgs() << " and 2 other values\n");
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
+ deleteAndRecombine(N);
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ }
+
+ // If this load is directly stored, replace the load value with the stored
+ // value.
+ // TODO: Handle store large -> read small portion.
+ // TODO: Handle TRUNCSTORE/LOADEXT
+ if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
+ if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+ StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+ if (PrevST->getBasePtr() == Ptr &&
+ PrevST->getValue().getValueType() == N->getValueType(0))
+ return CombineTo(N, Chain.getOperand(1), Chain);
+ }
+ }
+
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > LD->getMemOperand()->getBaseAlignment()) {
+ SDValue NewLoad = DAG.getExtLoad(
+ LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), Align,
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ if (NewLoad.getNode() != N)
+ return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ }
+ }
+ }
+
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
+ if (UseAA && LD->isUnindexed()) {
+ // Walk up chain skipping non-aliasing memory nodes.
+ SDValue BetterChain = FindBetterChain(N, Chain);
+
+ // If there is a better chain.
+ if (Chain != BetterChain) {
+ SDValue ReplLoad;
+
+ // Replace the chain to void dependency.
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+ ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
+ BetterChain, Ptr, LD->getMemOperand());
+ } else {
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getMemoryVT(),
+ LD->getMemOperand());
+ }
+
+ // Create token factor to keep old chain connected.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
+ MVT::Other, Chain, ReplLoad.getValue(1));
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorklist(Token.getNode());
+
+ // Replace uses with load result and token factor. Don't add users
+ // to work list.
+ return CombineTo(N, ReplLoad.getValue(0), Token, false);
+ }
+ }
+
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // Try to slice up N to more direct loads if the slices are mapped to
+ // different register banks or pairing can take place.
+ if (SliceUpLoad(N))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+namespace {
+/// \brief Helper structure used to slice a load in smaller loads.
+/// Basically a slice is obtained from the following sequence:
+/// Origin = load Ty1, Base
+/// Shift = srl Ty1 Origin, CstTy Amount
+/// Inst = trunc Shift to Ty2
+///
+/// Then, it will be rewriten into:
+/// Slice = load SliceTy, Base + SliceOffset
+/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
+///
+/// SliceTy is deduced from the number of bits that are actually used to
+/// build Inst.
+struct LoadedSlice {
+ /// \brief Helper structure used to compute the cost of a slice.
+ struct Cost {
+ /// Are we optimizing for code size.
+ bool ForCodeSize;
+ /// Various cost.
+ unsigned Loads;
+ unsigned Truncates;
+ unsigned CrossRegisterBanksCopies;
+ unsigned ZExts;
+ unsigned Shift;
+
+ Cost(bool ForCodeSize = false)
+ : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
+ CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
+
+ /// \brief Get the cost of one isolated slice.
+ Cost(const LoadedSlice &LS, bool ForCodeSize = false)
+ : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
+ CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
+ EVT TruncType = LS.Inst->getValueType(0);
+ EVT LoadedType = LS.getLoadedType();
+ if (TruncType != LoadedType &&
+ !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
+ ZExts = 1;
+ }
+
+ /// \brief Account for slicing gain in the current cost.
+ /// Slicing provide a few gains like removing a shift or a
+ /// truncate. This method allows to grow the cost of the original
+ /// load with the gain from this slice.
+ void addSliceGain(const LoadedSlice &LS) {
+ // Each slice saves a truncate.
+ const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
+ if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
+ LS.Inst->getValueType(0)))
+ ++Truncates;
+ // If there is a shift amount, this slice gets rid of it.
+ if (LS.Shift)
+ ++Shift;
+ // If this slice can merge a cross register bank copy, account for it.
+ if (LS.canMergeExpensiveCrossRegisterBankCopy())
+ ++CrossRegisterBanksCopies;
+ }
+
+ Cost &operator+=(const Cost &RHS) {
+ Loads += RHS.Loads;
+ Truncates += RHS.Truncates;
+ CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
+ ZExts += RHS.ZExts;
+ Shift += RHS.Shift;
+ return *this;
+ }
+
+ bool operator==(const Cost &RHS) const {
+ return Loads == RHS.Loads && Truncates == RHS.Truncates &&
+ CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
+ ZExts == RHS.ZExts && Shift == RHS.Shift;
+ }
+
+ bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
+
+ bool operator<(const Cost &RHS) const {
+ // Assume cross register banks copies are as expensive as loads.
+ // FIXME: Do we want some more target hooks?
+ unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
+ unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
+ // Unless we are optimizing for code size, consider the
+ // expensive operation first.
+ if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
+ return ExpensiveOpsLHS < ExpensiveOpsRHS;
+ return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
+ (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
+ }
+
+ bool operator>(const Cost &RHS) const { return RHS < *this; }
+
+ bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
+
+ bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
+ };
+ // The last instruction that represent the slice. This should be a
+ // truncate instruction.
+ SDNode *Inst;
+ // The original load instruction.
+ LoadSDNode *Origin;
+ // The right shift amount in bits from the original load.
+ unsigned Shift;
+ // The DAG from which Origin came from.
+ // This is used to get some contextual information about legal types, etc.
+ SelectionDAG *DAG;
+
+ LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
+ unsigned Shift = 0, SelectionDAG *DAG = nullptr)
+ : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
+
+ /// \brief Get the bits used in a chunk of bits \p BitWidth large.
+ /// \return Result is \p BitWidth and has used bits set to 1 and
+ /// not used bits set to 0.
+ APInt getUsedBits() const {
+ // Reproduce the trunc(lshr) sequence:
+ // - Start from the truncated value.
+ // - Zero extend to the desired bit width.
+ // - Shift left.
+ assert(Origin && "No original load to compare against.");
+ unsigned BitWidth = Origin->getValueSizeInBits(0);
+ assert(Inst && "This slice is not bound to an instruction");
+ assert(Inst->getValueSizeInBits(0) <= BitWidth &&
+ "Extracted slice is bigger than the whole type!");
+ APInt UsedBits(Inst->getValueSizeInBits(0), 0);
+ UsedBits.setAllBits();
+ UsedBits = UsedBits.zext(BitWidth);
+ UsedBits <<= Shift;
+ return UsedBits;
+ }
+
+ /// \brief Get the size of the slice to be loaded in bytes.
+ unsigned getLoadedSize() const {
+ unsigned SliceSize = getUsedBits().countPopulation();
+ assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
+ return SliceSize / 8;
+ }
+
+ /// \brief Get the type that will be loaded for this slice.
+ /// Note: This may not be the final type for the slice.
+ EVT getLoadedType() const {
+ assert(DAG && "Missing context");
+ LLVMContext &Ctxt = *DAG->getContext();
+ return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
+ }
+
+ /// \brief Get the alignment of the load used for this slice.
+ unsigned getAlignment() const {
+ unsigned Alignment = Origin->getAlignment();
+ unsigned Offset = getOffsetFromBase();
+ if (Offset != 0)
+ Alignment = MinAlign(Alignment, Alignment + Offset);
+ return Alignment;
+ }
+
+ /// \brief Check if this slice can be rewritten with legal operations.
+ bool isLegal() const {
+ // An invalid slice is not legal.
+ if (!Origin || !Inst || !DAG)
+ return false;
+
+ // Offsets are for indexed load only, we do not handle that.
+ if (!Origin->getOffset().isUndef())
+ return false;
+
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+
+ // Check that the type is legal.
+ EVT SliceType = getLoadedType();
+ if (!TLI.isTypeLegal(SliceType))
+ return false;
+
+ // Check that the load is legal for this type.
+ if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
+ return false;
+
+ // Check that the offset can be computed.
+ // 1. Check its type.
+ EVT PtrType = Origin->getBasePtr().getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ return false;
+
+ // 2. Check that it fits in the immediate.
+ if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
+ return false;
+
+ // 3. Check that the computation is legal.
+ if (!TLI.isOperationLegal(ISD::ADD, PtrType))
+ return false;
+
+ // Check that the zext is legal if it needs one.
+ EVT TruncateType = Inst->getValueType(0);
+ if (TruncateType != SliceType &&
+ !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
+ return false;
+
+ return true;
+ }
+
+ /// \brief Get the offset in bytes of this slice in the original chunk of
+ /// bits.
+ /// \pre DAG != nullptr.
+ uint64_t getOffsetFromBase() const {
+ assert(DAG && "Missing context.");
+ bool IsBigEndian = DAG->getDataLayout().isBigEndian();
+ assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
+ uint64_t Offset = Shift / 8;
+ unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
+ assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
+ "The size of the original loaded type is not a multiple of a"
+ " byte.");
+ // If Offset is bigger than TySizeInBytes, it means we are loading all
+ // zeros. This should have been optimized before in the process.
+ assert(TySizeInBytes > Offset &&
+ "Invalid shift amount for given loaded size");
+ if (IsBigEndian)
+ Offset = TySizeInBytes - Offset - getLoadedSize();
+ return Offset;
+ }
+
+ /// \brief Generate the sequence of instructions to load the slice
+ /// represented by this object and redirect the uses of this slice to
+ /// this new sequence of instructions.
+ /// \pre this->Inst && this->Origin are valid Instructions and this
+ /// object passed the legal check: LoadedSlice::isLegal returned true.
+ /// \return The last instruction of the sequence used to load the slice.
+ SDValue loadSlice() const {
+ assert(Inst && Origin && "Unable to replace a non-existing slice.");
+ const SDValue &OldBaseAddr = Origin->getBasePtr();
+ SDValue BaseAddr = OldBaseAddr;
+ // Get the offset in that chunk of bytes w.r.t. the endianess.
+ int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
+ assert(Offset >= 0 && "Offset too big to fit in int64_t!");
+ if (Offset) {
+ // BaseAddr = BaseAddr + Offset.
+ EVT ArithType = BaseAddr.getValueType();
+ SDLoc DL(Origin);
+ BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
+ DAG->getConstant(Offset, DL, ArithType));
+ }
+
+ // Create the type of the loaded slice according to its size.
+ EVT SliceType = getLoadedType();
+
+ // Create the load for the slice.
+ SDValue LastInst =
+ DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
+ Origin->getPointerInfo().getWithOffset(Offset),
+ getAlignment(), Origin->getMemOperand()->getFlags());
+ // If the final type is not the same as the loaded type, this means that
+ // we have to pad with zero. Create a zero extend for that.
+ EVT FinalType = Inst->getValueType(0);
+ if (SliceType != FinalType)
+ LastInst =
+ DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
+ return LastInst;
+ }
+
+ /// \brief Check if this slice can be merged with an expensive cross register
+ /// bank copy. E.g.,
+ /// i = load i32
+ /// f = bitcast i32 i to float
+ bool canMergeExpensiveCrossRegisterBankCopy() const {
+ if (!Inst || !Inst->hasOneUse())
+ return false;
+ SDNode *Use = *Inst->use_begin();
+ if (Use->getOpcode() != ISD::BITCAST)
+ return false;
+ assert(DAG && "Missing context");
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ EVT ResVT = Use->getValueType(0);
+ const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
+ const TargetRegisterClass *ArgRC =
+ TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
+ if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // At this point, we know that we perform a cross-register-bank copy.
+ // Check if it is expensive.
+ const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
+ // Assume bitcasts are cheap, unless both register classes do not
+ // explicitly share a common sub class.
+ if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
+ return false;
+
+ // Check if it will be merged with the load.
+ // 1. Check the alignment constraint.
+ unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
+ ResVT.getTypeForEVT(*DAG->getContext()));
+
+ if (RequiredAlignment > getAlignment())
+ return false;
+
+ // 2. Check that the load is a legal operation for that type.
+ if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
+ return false;
+
+ // 3. Check that we do not have a zext in the way.
+ if (Inst->getValueType(0) != getLoadedType())
+ return false;
+
+ return true;
+ }
+};
+}
+
+/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
+/// \p UsedBits looks like 0..0 1..1 0..0.
+static bool areUsedBitsDense(const APInt &UsedBits) {
+ // If all the bits are one, this is dense!
+ if (UsedBits.isAllOnesValue())
+ return true;
+
+ // Get rid of the unused bits on the right.
+ APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
+ // Get rid of the unused bits on the left.
+ if (NarrowedUsedBits.countLeadingZeros())
+ NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
+ // Check that the chunk of bits is completely used.
+ return NarrowedUsedBits.isAllOnesValue();
+}
+
+/// \brief Check whether or not \p First and \p Second are next to each other
+/// in memory. This means that there is no hole between the bits loaded
+/// by \p First and the bits loaded by \p Second.
+static bool areSlicesNextToEachOther(const LoadedSlice &First,
+ const LoadedSlice &Second) {
+ assert(First.Origin == Second.Origin && First.Origin &&
+ "Unable to match different memory origins.");
+ APInt UsedBits = First.getUsedBits();
+ assert((UsedBits & Second.getUsedBits()) == 0 &&
+ "Slices are not supposed to overlap.");
+ UsedBits |= Second.getUsedBits();
+ return areUsedBitsDense(UsedBits);
+}
+
+/// \brief Adjust the \p GlobalLSCost according to the target
+/// paring capabilities and the layout of the slices.
+/// \pre \p GlobalLSCost should account for at least as many loads as
+/// there is in the slices in \p LoadedSlices.
+static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ LoadedSlice::Cost &GlobalLSCost) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ // If there is less than 2 elements, no pairing is possible.
+ if (NumberOfSlices < 2)
+ return;
+
+ // Sort the slices so that elements that are likely to be next to each
+ // other in memory are next to each other in the list.
+ std::sort(LoadedSlices.begin(), LoadedSlices.end(),
+ [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+ return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+ });
+ const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
+ // First (resp. Second) is the first (resp. Second) potentially candidate
+ // to be placed in a paired load.
+ const LoadedSlice *First = nullptr;
+ const LoadedSlice *Second = nullptr;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
+ // Set the beginning of the pair.
+ First = Second) {
+
+ Second = &LoadedSlices[CurrSlice];
+
+ // If First is NULL, it means we start a new pair.
+ // Get to the next slice.
+ if (!First)
+ continue;
+
+ EVT LoadedType = First->getLoadedType();
+
+ // If the types of the slices are different, we cannot pair them.
+ if (LoadedType != Second->getLoadedType())
+ continue;
+
+ // Check if the target supplies paired loads for this type.
+ unsigned RequiredAlignment = 0;
+ if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
+ // move to the next pair, this type is hopeless.
+ Second = nullptr;
+ continue;
+ }
+ // Check if we meet the alignment requirement.
+ if (RequiredAlignment > First->getAlignment())
+ continue;
+
+ // Check that both loads are next to each other in memory.
+ if (!areSlicesNextToEachOther(*First, *Second))
+ continue;
+
+ assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
+ --GlobalLSCost.Loads;
+ // Move to the next pair.
+ Second = nullptr;
+ }
+}
+
+/// \brief Check the profitability of all involved LoadedSlice.
+/// Currently, it is considered profitable if there is exactly two
+/// involved slices (1) which are (2) next to each other in memory, and
+/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
+///
+/// Note: The order of the elements in \p LoadedSlices may be modified, but not
+/// the elements themselves.
+///
+/// FIXME: When the cost model will be mature enough, we can relax
+/// constraints (1) and (2).
+static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
+ const APInt &UsedBits, bool ForCodeSize) {
+ unsigned NumberOfSlices = LoadedSlices.size();
+ if (StressLoadSlicing)
+ return NumberOfSlices > 1;
+
+ // Check (1).
+ if (NumberOfSlices != 2)
+ return false;
+
+ // Check (2).
+ if (!areUsedBitsDense(UsedBits))
+ return false;
+
+ // Check (3).
+ LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
+ // The original code has one big load.
+ OrigCost.Loads = 1;
+ for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
+ const LoadedSlice &LS = LoadedSlices[CurrSlice];
+ // Accumulate the cost of all the slices.
+ LoadedSlice::Cost SliceCost(LS, ForCodeSize);
+ GlobalSlicingCost += SliceCost;
+
+ // Account as cost in the original configuration the gain obtained
+ // with the current slices.
+ OrigCost.addSliceGain(LS);
+ }
+
+ // If the target supports paired load, adjust the cost accordingly.
+ adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
+ return OrigCost > GlobalSlicingCost;
+}
+
+/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
+/// operations, split it in the various pieces being extracted.
+///
+/// This sort of thing is introduced by SROA.
+/// This slicing takes care not to insert overlapping loads.
+/// \pre LI is a simple load (i.e., not an atomic or volatile load).
+bool DAGCombiner::SliceUpLoad(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
+ !LD->getValueType(0).isInteger())
+ return false;
+
+ // Keep track of already used bits to detect overlapping values.
+ // In that case, we will just abort the transformation.
+ APInt UsedBits(LD->getValueSizeInBits(0), 0);
+
+ SmallVector<LoadedSlice, 4> LoadedSlices;
+
+ // Check if this load is used as several smaller chunks of bits.
+ // Basically, look for uses in trunc or trunc(lshr) and record a new chain
+ // of computation for each trunc.
+ for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
+ UI != UIEnd; ++UI) {
+ // Skip the uses of the chain.
+ if (UI.getUse().getResNo() != 0)
+ continue;
+
+ SDNode *User = *UI;
+ unsigned Shift = 0;
+
+ // Check if this is a trunc(lshr).
+ if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
+ isa<ConstantSDNode>(User->getOperand(1))) {
+ Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
+ User = *User->use_begin();
+ }
+
+ // At this point, User is a Truncate, iff we encountered, trunc or
+ // trunc(lshr).
+ if (User->getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ // The width of the type must be a power of 2 and greater than 8-bits.
+ // Otherwise the load cannot be represented in LLVM IR.
+ // Moreover, if we shifted with a non-8-bits multiple, the slice
+ // will be across several bytes. We do not support that.
+ unsigned Width = User->getValueSizeInBits(0);
+ if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
+ return 0;
+
+ // Build the slice for this chain of computations.
+ LoadedSlice LS(User, LD, Shift, &DAG);
+ APInt CurrentUsedBits = LS.getUsedBits();
+
+ // Check if this slice overlaps with another.
+ if ((CurrentUsedBits & UsedBits) != 0)
+ return false;
+ // Update the bits used globally.
+ UsedBits |= CurrentUsedBits;
+
+ // Check if the new slice would be legal.
+ if (!LS.isLegal())
+ return false;
+
+ // Record the slice.
+ LoadedSlices.push_back(LS);
+ }
+
+ // Abort slicing if it does not seem to be profitable.
+ if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
+ return false;
+
+ ++SlicedLoads;
+
+ // Rewrite each chain to use an independent load.
+ // By construction, each chain can be represented by a unique load.
+
+ // Prepare the argument for the new token factor for all the slices.
+ SmallVector<SDValue, 8> ArgChains;
+ for (SmallVectorImpl<LoadedSlice>::const_iterator
+ LSIt = LoadedSlices.begin(),
+ LSItEnd = LoadedSlices.end();
+ LSIt != LSItEnd; ++LSIt) {
+ SDValue SliceInst = LSIt->loadSlice();
+ CombineTo(LSIt->Inst, SliceInst, true);
+ if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
+ SliceInst = SliceInst.getOperand(0);
+ assert(SliceInst->getOpcode() == ISD::LOAD &&
+ "It takes more than a zext to get to the loaded slice!!");
+ ArgChains.push_back(SliceInst.getValue(1));
+ }
+
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
+ ArgChains);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ return true;
+}
+
+/// Check to see if V is (and load (ptr), imm), where the load is having
+/// specific bytes cleared out. If so, return the byte size being masked out
+/// and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+ std::pair<unsigned, unsigned> Result(0, 0);
+
+ // Check for the structure we're looking for.
+ if (V->getOpcode() != ISD::AND ||
+ !isa<ConstantSDNode>(V->getOperand(1)) ||
+ !ISD::isNormalLoad(V->getOperand(0).getNode()))
+ return Result;
+
+ // Check the chain and pointer.
+ LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+ if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
+
+ // The store should be chained directly to the load or be an operand of a
+ // tokenfactor.
+ if (LD == Chain.getNode())
+ ; // ok.
+ else if (Chain->getOpcode() != ISD::TokenFactor)
+ return Result; // Fail.
+ else {
+ bool isOk = false;
+ for (const SDValue &ChainOp : Chain->op_values())
+ if (ChainOp.getNode() == LD) {
+ isOk = true;
+ break;
+ }
+ if (!isOk) return Result;
+ }
+
+ // This only handles simple types.
+ if (V.getValueType() != MVT::i16 &&
+ V.getValueType() != MVT::i32 &&
+ V.getValueType() != MVT::i64)
+ return Result;
+
+ // Check the constant mask. Invert it so that the bits being masked out are
+ // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
+ // follow the sign bit for uniformity.
+ uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+ unsigned NotMaskLZ = countLeadingZeros(NotMask);
+ if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
+ unsigned NotMaskTZ = countTrailingZeros(NotMask);
+ if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
+ if (NotMaskLZ == 64) return Result; // All zero mask.
+
+ // See if we have a continuous run of bits. If so, we have 0*1+0*
+ if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
+ return Result;
+
+ // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+ if (V.getValueType() != MVT::i64 && NotMaskLZ)
+ NotMaskLZ -= 64-V.getValueSizeInBits();
+
+ unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+ switch (MaskedBytes) {
+ case 1:
+ case 2:
+ case 4: break;
+ default: return Result; // All one mask, or 5-byte mask.
+ }
+
+ // Verify that the first bit starts at a multiple of mask so that the access
+ // is aligned the same as the access width.
+ if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+
+ Result.first = MaskedBytes;
+ Result.second = NotMaskTZ/8;
+ return Result;
+}
+
+
+/// Check to see if IVal is something that provides a value as specified by
+/// MaskInfo. If so, replace the specified store with a narrower store of
+/// truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+ SDValue IVal, StoreSDNode *St,
+ DAGCombiner *DC) {
+ unsigned NumBytes = MaskInfo.first;
+ unsigned ByteShift = MaskInfo.second;
+ SelectionDAG &DAG = DC->getDAG();
+
+ // Check to see if IVal is all zeros in the part being masked in by the 'or'
+ // that uses this. If not, this is not a replacement.
+ APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+ ByteShift*8, (ByteShift+NumBytes)*8);
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
+
+ // Check that it is legal on the target to do this. It is legal if the new
+ // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+ // legalization.
+ MVT VT = MVT::getIntegerVT(NumBytes*8);
+ if (!DC->isTypeLegal(VT))
+ return nullptr;
+
+ // Okay, we can do this! Replace the 'St' store with a store of IVal that is
+ // shifted by ByteShift and truncated down to NumBytes.
+ if (ByteShift) {
+ SDLoc DL(IVal);
+ IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
+ DAG.getConstant(ByteShift*8, DL,
+ DC->getShiftAmountTy(IVal.getValueType())));
+ }
+
+ // Figure out the offset for the store and the alignment of the access.
+ unsigned StOffset;
+ unsigned NewAlign = St->getAlignment();
+
+ if (DAG.getDataLayout().isLittleEndian())
+ StOffset = ByteShift;
+ else
+ StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+
+ SDValue Ptr = St->getBasePtr();
+ if (StOffset) {
+ SDLoc DL(IVal);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
+ NewAlign = MinAlign(NewAlign, StOffset);
+ }
+
+ // Truncate down to the new size.
+ IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
+
+ ++OpsNarrowed;
+ return DAG
+ .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset), NewAlign)
+ .getNode();
+}
+
+
+/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
+/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
+/// narrowing the load and store if it would end up being a win for performance
+/// or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ if (ST->isVolatile())
+ return SDValue();
+
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ EVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue();
+
+ unsigned Opc = Value.getOpcode();
+
+ // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+ // is a byte mask indicating a consecutive number of bytes, check to see if
+ // Y is known to provide just those bytes. If so, we try to replace the
+ // load + replace + store sequence with a single (narrower) store, which makes
+ // the load dead.
+ if (Opc == ISD::OR) {
+ std::pair<unsigned, unsigned> MaskedLoad;
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(1), ST,this))
+ return SDValue(NewST, 0);
+
+ // Or is commutative, so try swapping X and Y.
+ MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+ if (MaskedLoad.first)
+ if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ Value.getOperand(0), ST,this))
+ return SDValue(NewST, 0);
+ }
+
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ Chain == SDValue(N0.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
+ return SDValue();
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ if (Imm == 0 || Imm.isAllOnesValue())
+ return SDValue();
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ // The narrowing should be profitable, the load/store operation should be
+ // legal (or custom) and the store size should be equal to the NewVT width.
+ while (NewBW < BitWidth &&
+ (NewVT.getStoreSizeInBits() != NewBW ||
+ !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
+ !TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+ }
+ if (NewBW >= BitWidth)
+ return SDValue();
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
+ std::min(BitWidth, ShAmt + NewBW));
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (DAG.getDataLayout().isBigEndian())
+ PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+ if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
+ return SDValue();
+
+ SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, SDLoc(LD),
+ Ptr.getValueType()));
+ SDValue NewLD =
+ DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
+ LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
+ DAG.getConstant(NewImm, SDLoc(Value),
+ NewVT));
+ SDValue NewST =
+ DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
+ ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
+
+ AddToWorklist(NewPtr.getNode());
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewVal.getNode());
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue();
+}
+
+/// For a given floating point load / store pair, if the load value isn't used
+/// by any other operations, then consider transforming the pair to integer
+/// load / store operations if the target deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD =
+ DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(), LDAlign);
+
+ SDValue NewST =
+ DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(), STAlign);
+
+ AddToWorklist(NewLD.getNode());
+ AddToWorklist(NewST.getNode());
+ WorklistRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
+namespace {
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (add (i8 load %index)
+/// (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+/// (i64 signextend (i32 add (i32 signextend (i8 load %index))
+/// (i32 1)))))
+struct BaseIndexOffset {
+ SDValue Base;
+ SDValue Index;
+ int64_t Offset;
+ bool IsIndexSignExt;
+
+ BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+ BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+ bool IsIndexSignExt) :
+ Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+ bool equalBaseIndex(const BaseIndexOffset &Other) {
+ return Other.Base == Base && Other.Index == Index &&
+ Other.IsIndexSignExt == IsIndexSignExt;
+ }
+
+ /// Parses tree in Ptr for base, index, offset addresses.
+ static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
+ bool IsIndexSignExt = false;
+
+ // Split up a folded GlobalAddress+Offset into its component parts.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
+ if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
+ return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
+ SDLoc(GA),
+ GA->getValueType(0),
+ /*Offset=*/0,
+ /*isTargetGA=*/false,
+ GA->getTargetFlags()),
+ SDValue(),
+ GA->getOffset(),
+ IsIndexSignExt);
+ }
+
+ // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
+ // instruction, then it could be just the BASE or everything else we don't
+ // know how to handle. Just use Ptr as BASE and give up.
+ if (Ptr->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // We know that we have at least an ADD instruction. Try to pattern match
+ // the simple case of BASE + OFFSET.
+ if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+ int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+ return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
+ IsIndexSignExt);
+ }
+
+ // Inside a loop the current BASE pointer is calculated using an ADD and a
+ // MUL instruction. In this case Ptr is the actual BASE pointer.
+ // (i64 add (i64 %array_ptr)
+ // (i64 mul (i64 %induction_var)
+ // (i64 %element_size)))
+ if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Look at Base + Index + Offset cases.
+ SDValue Base = Ptr->getOperand(0);
+ SDValue IndexOffset = Ptr->getOperand(1);
+
+ // Skip signextends.
+ if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+ IndexOffset = IndexOffset->getOperand(0);
+ IsIndexSignExt = true;
+ }
+
+ // Either the case of Base + Index (no offset) or something else.
+ if (IndexOffset->getOpcode() != ISD::ADD)
+ return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+
+ // Now we have the case of Base + Index + offset.
+ SDValue Index = IndexOffset->getOperand(0);
+ SDValue Offset = IndexOffset->getOperand(1);
+
+ if (!isa<ConstantSDNode>(Offset))
+ return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+ // Ignore signextends.
+ if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+ Index = Index->getOperand(0);
+ IsIndexSignExt = true;
+ } else IsIndexSignExt = false;
+
+ int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+ return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+ }
+};
+} // namespace
+
+// This is a helper function for visitMUL to check the profitability
+// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+// MulNode is the original multiply, AddNode is (add x, c1),
+// and ConstNode is c2.
+//
+// If the (add x, c1) has multiple uses, we could increase
+// the number of adds if we make this transformation.
+// It would only be worth doing this if we can remove a
+// multiply in the process. Check for that here.
+// To illustrate:
+// (A + c1) * c3
+// (A + c2) * c3
+// We're checking for cases where we have common "c3 * A" expressions.
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
+ SDValue &AddNode,
+ SDValue &ConstNode) {
+ APInt Val;
+
+ // If the add only has one use, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse())
+ return true;
+
+ // Walk all the users of the constant with which we're multiplying.
+ for (SDNode *Use : ConstNode->uses()) {
+
+ if (Use == MulNode) // This use is the one we're on right now. Skip it.
+ continue;
+
+ if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
+ SDNode *OtherOp;
+ SDNode *MulVar = AddNode.getOperand(0).getNode();
+
+ // OtherOp is what we're multiplying against the constant.
+ if (Use->getOperand(0) == ConstNode)
+ OtherOp = Use->getOperand(1).getNode();
+ else
+ OtherOp = Use->getOperand(0).getNode();
+
+ // Check to see if multiply is with the same operand of our "add".
+ //
+ // ConstNode = CONST
+ // Use = ConstNode * A <-- visiting Use. OtherOp is A.
+ // ...
+ // AddNode = (A + c1) <-- MulVar is A.
+ // = AddNode * ConstNode <-- current visiting instruction.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (ConstNode * A) that we can save.
+ if (OtherOp == MulVar)
+ return true;
+
+ // Now check to see if a future expansion will give us a common
+ // multiply.
+ //
+ // ConstNode = CONST
+ // AddNode = (A + c1)
+ // ... = AddNode * ConstNode <-- current visiting instruction.
+ // ...
+ // OtherOp = (A + c2)
+ // Use = OtherOp * ConstNode <-- visiting Use.
+ //
+ // If we make this transformation, we will have a common
+ // multiply (CONST * A) after we also do the same transformation
+ // to the "t2" instruction.
+ if (OtherOp->getOpcode() == ISD::ADD &&
+ DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
+ OtherOp->getOperand(0).getNode() == MulVar)
+ return true;
+ }
+ }
+
+ // Didn't find a case where this would be profitable.
+ return false;
+}
+
+SDValue DAGCombiner::getMergedConstantVectorStore(
+ SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
+ SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
+ SmallVector<SDValue, 8> BuildVector;
+
+ for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
+ Chains.push_back(St->getChain());
+ BuildVector.push_back(St->getValue());
+ }
+
+ return DAG.getBuildVector(Ty, SL, BuildVector);
+}
+
+bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
+ SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
+ unsigned NumStores, bool IsConstantSrc, bool UseVector) {
+ // Make sure we have something to merge.
+ if (NumStores < 2)
+ return false;
+
+ int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned LatestNodeUsed = 0;
+
+ for (unsigned i=0; i < NumStores; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // latest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+ LatestNodeUsed = i;
+ }
+
+ SmallVector<SDValue, 8> Chains;
+
+ // The latest Node in the DAG.
+ LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+ SDLoc DL(StoreNodes[0].MemNode);
+
+ SDValue StoredVal;
+ if (UseVector) {
+ bool IsVec = MemVT.isVector();
+ unsigned Elts = NumStores;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ // Get the type for the merged vector store.
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+
+ if (IsConstantSrc) {
+ StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
+ } else {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumStores; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue Val = St->getValue();
+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
+ if (Val.getValueType() != MemVT)
+ return false;
+ Ops.push_back(Val);
+ Chains.push_back(St->getChain());
+ }
+
+ // Build the extracted vector elements back into a vector.
+ StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
+ DL, Ty, Ops); }
+ } else {
+ // We should always use a vector store when merging extracted vector
+ // elements, so this path implies a store of constants.
+ assert(IsConstantSrc && "Merged vector elements should use vector store");
+
+ unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
+ APInt StoreInt(SizeInBits, 0);
+
+ // Construct a single integer constant which is made of the smaller
+ // constant inputs.
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ for (unsigned i = 0; i < NumStores; ++i) {
+ unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+ Chains.push_back(St->getChain());
+
+ SDValue Val = St->getValue();
+ StoreInt <<= ElementSizeBytes * 8;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+ StoreInt |= C->getAPIntValue().zext(SizeInBits);
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+ StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
+ } else {
+ llvm_unreachable("Invalid constant element type");
+ }
+ }
+
+ // Create the new Load and Store operations.
+ EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
+ }
+
+ assert(!Chains.empty());
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
+ FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ FirstInChain->getAlignment());
+
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+ if (UseAA) {
+ // Replace all merged stores with the new store.
+ for (unsigned i = 0; i < NumStores; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ } else {
+ // Replace the last store with the new store.
+ CombineTo(LatestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumStores; ++i) {
+ if (StoreNodes[i].MemNode == LatestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ // ReplaceAllUsesWith will replace all uses that existed when it was
+ // called, but graph optimizations may cause new ones to appear. For
+ // example, the case in pr14333 looks like
+ //
+ // St's chain -> St -> another store -> X
+ //
+ // And the only difference from St to the other store is the chain.
+ // When we change it's chain to be St's chain they become identical,
+ // get CSEed and the net result is that X is now a use of St.
+ // Since we know that St is redundant, just iterate.
+ while (!St->use_empty())
+ DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
+ deleteAndRecombine(St);
+ }
+ }
+
+ return true;
+}
+
+void DAGCombiner::getStoreMergeAndAliasCandidates(
+ StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
+ SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+
+ // We must have a base and an offset.
+ if (!BasePtr.Base.getNode())
+ return;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.Base.isUndef())
+ return;
+
+ // Walk up the chain and look for nodes with offsets from the same
+ // base pointer. Stop when reaching an instruction with a different kind
+ // or instruction which has a different base pointer.
+ EVT MemVT = St->getMemoryVT();
+ unsigned Seq = 0;
+ StoreSDNode *Index = St;
+
+
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+
+ if (UseAA) {
+ // Look at other users of the same chain. Stores on the same chain do not
+ // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
+ // to be on the same chain, so don't bother looking at adjacent chains.
+
+ SDValue Chain = St->getChain();
+ for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ if (I.getOperandNo() != 0)
+ continue;
+
+ if (OtherST->isVolatile() || OtherST->isIndexed())
+ continue;
+
+ if (OtherST->getMemoryVT() != MemVT)
+ continue;
+
+ BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
+
+ if (Ptr.equalBaseIndex(BasePtr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
+ }
+ }
+
+ return;
+ }
+
+ while (Index) {
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (Index != St && !SDValue(Index, 0)->hasOneUse())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
+
+ // Check that the base pointer is the same as the original one.
+ if (!Ptr.equalBaseIndex(BasePtr))
+ break;
+
+ // The memory operands must not be volatile.
+ if (Index->isVolatile() || Index->isIndexed())
+ break;
+
+ // No truncation.
+ if (Index->isTruncatingStore())
+ break;
+
+ // The stored memory type must be the same.
+ if (Index->getMemoryVT() != MemVT)
+ break;
+
+ // We do not allow under-aligned stores in order to prevent
+ // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
+ // be irrelevant here; what MATTERS is that we not move memory
+ // operations that potentially overlap past each-other.
+ if (Index->getAlignment() < MemVT.getStoreSize())
+ break;
+
+ // We found a potential memory operand to merge.
+ StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
+
+ // Find the next memory operand in the chain. If the next operand in the
+ // chain is a store then move up and continue the scan with the next
+ // memory operand. If the next operand is a load save it and use alias
+ // information to check if it interferes with anything.
+ SDNode *NextInChain = Index->getChain().getNode();
+ while (1) {
+ if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+ // We found a store node. Use it for the next iteration.
+ Index = STn;
+ break;
+ } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+ if (Ldn->isVolatile()) {
+ Index = nullptr;
+ break;
+ }
+
+ // Save the load node for later. Continue the scan.
+ AliasLoadNodes.push_back(Ldn);
+ NextInChain = Ldn->getChain().getNode();
+ continue;
+ } else {
+ Index = nullptr;
+ break;
+ }
+ }
+ }
+}
+
+// We need to check that merging these stores does not cause a loop
+// in the DAG. Any store candidate may depend on another candidate
+// indirectly through its operand (we already consider dependencies
+// through the chain). Check in parallel by searching up from
+// non-chain operands of candidates.
+bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
+ SmallVectorImpl<MemOpLink> &StoreNodes) {
+ SmallPtrSet<const SDNode *, 16> Visited;
+ SmallVector<const SDNode *, 8> Worklist;
+ // search ops of store candidates
+ for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ SDNode *n = StoreNodes[i].MemNode;
+ // Potential loops may happen only through non-chain operands
+ for (unsigned j = 1; j < n->getNumOperands(); ++j)
+ Worklist.push_back(n->getOperand(j).getNode());
+ }
+ // search through DAG. We can stop early if we find a storenode
+ for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
+ return false;
+ }
+ return true;
+}
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ EVT MemVT = St->getMemoryVT();
+ int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+ bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ Attribute::NoImplicitFloat);
+
+ // This function cannot currently deal with non-byte-sized memory sizes.
+ if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+ return false;
+
+ if (!MemVT.isSimple())
+ return false;
+
+ // Perform an early exit check. Do not bother looking at stored values that
+ // are not constants, loads, or extracted vector elements.
+ SDValue StoredVal = St->getValue();
+ bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+ bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+ isa<ConstantFPSDNode>(StoredVal);
+ bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+
+ if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
+ return false;
+
+ // Don't merge vectors into wider vectors if the source data comes from loads.
+ // TODO: This restriction can be lifted by using logic similar to the
+ // ExtractVecSrc case.
+ if (MemVT.isVector() && IsLoadSrc)
+ return false;
+
+ // Only look at ends of store sequences.
+ SDValue Chain = SDValue(St, 0);
+ if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+ return false;
+
+ // Save the LoadSDNodes that we find in the chain.
+ // We need to make sure that these nodes do not interfere with
+ // any of the store nodes.
+ SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
+
+ // Save the StoreSDNodes that we find in the chain.
+ SmallVector<MemOpLink, 8> StoreNodes;
+
+ getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
+
+ // Check if there is anything to merge.
+ if (StoreNodes.size() < 2)
+ return false;
+
+ // only do dep endence check in AA case
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+ if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
+ return false;
+
+ // Sort the memory operands according to their distance from the
+ // base pointer. As a secondary criteria: make sure stores coming
+ // later in the code come first in the list. This is important for
+ // the non-UseAA case, because we're merging stores into the FINAL
+ // store along a chain which potentially contains aliasing stores.
+ // Thus, if there are multiple stores to the same address, the last
+ // one can be considered for merging but not the others.
+ std::sort(StoreNodes.begin(), StoreNodes.end(),
+ [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase ||
+ (LHS.OffsetFromBase == RHS.OffsetFromBase &&
+ LHS.SequenceNum < RHS.SequenceNum);
+ });
+
+ // Scan the memory operations on the chain and find the first non-consecutive
+ // store memory address.
+ unsigned LastConsecutiveStore = 0;
+ int64_t StartAddress = StoreNodes[0].OffsetFromBase;
+ for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
+
+ // Check that the addresses are consecutive starting from the second
+ // element in the list of stores.
+ if (i > 0) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ }
+
+ // Check if this store interferes with any of the loads that we found.
+ // If we find a load that alias with this store. Stop the sequence.
+ if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
+ [&](LSBaseSDNode* Ldn) {
+ return isAlias(Ldn, StoreNodes[i].MemNode);
+ }))
+ break;
+
+ // Mark this node as useful.
+ LastConsecutiveStore = i;
+ }
+
+ // The node with the lowest store address.
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ LLVMContext &Context = *DAG.getContext();
+ const DataLayout &DL = DAG.getDataLayout();
+
+ // Store the constants into memory as one consecutive store.
+ if (IsConstantSrc) {
+ unsigned LastLegalType = 0;
+ unsigned LastLegalVectorType = 0;
+ bool NonZero = false;
+ for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = St->getValue();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+ NonZero |= !C->isNullValue();
+ } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
+ NonZero |= !C->getConstantFPValue()->isNullValue();
+ } else {
+ // Non-constant.
+ break;
+ }
+
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast) {
+ LastLegalType = i+1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ IsFast) {
+ LastLegalType = i + 1;
+ }
+ }
+
+ // We only use vectors if the constant is known to be zero or the target
+ // allows it and the function is not marked with the noimplicitfloat
+ // attribute.
+ if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
+ FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast)
+ LastLegalVectorType = i + 1;
+ }
+ }
+
+ // Check if we found a legal integer type to store.
+ if (LastLegalType == 0 && LastLegalVectorType == 0)
+ return false;
+
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ true, UseVector);
+ }
+
+ // When extracting multiple vector elements, try to store them
+ // in one vector store rather than a sequence of scalar stores.
+ if (IsExtractVecSrc) {
+ unsigned NumStoresToMerge = 0;
+ bool IsVec = MemVT.isVector();
+ for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ unsigned StoreValOpcode = St->getValue().getOpcode();
+ // This restriction could be loosened.
+ // Bail out if any stored values are not elements extracted from a vector.
+ // It should be possible to handle mixed sources, but load sources need
+ // more careful handling (see the block of code below that handles
+ // consecutive loads).
+ if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
+ StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
+ return false;
+
+ // Find a legal type for the vector store.
+ unsigned Elts = i + 1;
+ if (IsVec) {
+ // When merging vector stores, get the total number of elements.
+ Elts *= MemVT.getVectorNumElements();
+ }
+ EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ bool IsFast;
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) && IsFast)
+ NumStoresToMerge = i + 1;
+ }
+
+ return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
+ false, true);
+ }
+
+ // Below we handle the case of multiple consecutive stores that
+ // come from multiple consecutive loads. We merge them into a single
+ // wide load and a single wide store.
+
+ // Look for load nodes which are used by the stored values.
+ SmallVector<MemOpLink, 8> LoadNodes;
+
+ // Find acceptable loads. Loads need to have the same chain (token factor),
+ // must not be zext, volatile, indexed, and they must be consecutive.
+ BaseIndexOffset LdBasePtr;
+ for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+ if (!Ld) break;
+
+ // Loads must only have one use.
+ if (!Ld->hasNUsesOfValue(1, 0))
+ break;
+
+ // The memory operands must not be volatile.
+ if (Ld->isVolatile() || Ld->isIndexed())
+ break;
+
+ // We do not accept ext loads.
+ if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ break;
+
+ // The stored memory type must be the same.
+ if (Ld->getMemoryVT() != MemVT)
+ break;
+
+ BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ // If this is not the first ptr that we check.
+ if (LdBasePtr.Base.getNode()) {
+ // The base ptr must be the same.
+ if (!LdPtr.equalBaseIndex(LdBasePtr))
+ break;
+ } else {
+ // Check that all other base pointers are the same as this one.
+ LdBasePtr = LdPtr;
+ }
+
+ // We found a potential memory operand to merge.
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
+ }
+
+ if (LoadNodes.size() < 2)
+ return false;
+
+ // If we have load/store pair instructions and we only have two values,
+ // don't bother.
+ unsigned RequiredAlignment;
+ if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+ St->getAlignment() >= RequiredAlignment)
+ return false;
+
+ LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+ unsigned FirstLoadAS = FirstLoad->getAddressSpace();
+ unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
+ // Scan the memory operations on the chain and find the first non-consecutive
+ // load memory address. These variables hold the index in the store node
+ // array.
+ unsigned LastConsecutiveLoad = 0;
+ // This variable refers to the size and not index in the array.
+ unsigned LastLegalVectorType = 0;
+ unsigned LastLegalIntegerType = 0;
+ StartAddress = LoadNodes[0].OffsetFromBase;
+ SDValue FirstChain = FirstLoad->getChain();
+ for (unsigned i = 1; i < LoadNodes.size(); ++i) {
+ // All loads must share the same chain.
+ if (LoadNodes[i].MemNode->getChain() != FirstChain)
+ break;
+
+ int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+ break;
+ LastConsecutiveLoad = i;
+ // Find a legal type for the vector store.
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
+ bool IsFastSt, IsFastLd;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) && IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) && IsFastLd) {
+ LastLegalVectorType = i + 1;
+ }
+
+ // Find a legal type for the integer store.
+ unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
+ StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) && IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+ FirstLoadAlign, &IsFastLd) && IsFastLd)
+ LastLegalIntegerType = i + 1;
+ // Or check whether a truncstore and extload is legal.
+ else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(Context, StoreTy);
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+ IsFastSt &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
+ IsFastLd)
+ LastLegalIntegerType = i+1;
+ }
+ }
+
+ // Only use vector types if the vector type is larger than the integer type.
+ // If they are the same, use integers.
+ bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
+ unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
+
+ // We add +1 here because the LastXXX variables refer to location while
+ // the NumElem refers to array/index size.
+ unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
+ NumElem = std::min(LastLegalType, NumElem);
+
+ if (NumElem < 2)
+ return false;
+
+ // Collect the chains from all merged stores.
+ SmallVector<SDValue, 8> MergeStoreChains;
+ MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
+
+ // The latest Node in the DAG.
+ unsigned LatestNodeUsed = 0;
+ for (unsigned i=1; i<NumElem; ++i) {
+ // Find a chain for the new wide-store operand. Notice that some
+ // of the store nodes that we found may not be selected for inclusion
+ // in the wide store. The chain we use needs to be the chain of the
+ // latest store node which is *used* and replaced by the wide store.
+ if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+ LatestNodeUsed = i;
+
+ MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
+ }
+
+ LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+
+ // Find if it is better to use vectors or integers to load and store
+ // to memory.
+ EVT JointMemOpVT;
+ if (UseVectorTy) {
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
+ } else {
+ unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+ JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
+ }
+
+ SDLoc LoadDL(LoadNodes[0].MemNode);
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ // The merged loads are required to have the same incoming chain, so
+ // using the first's chain is acceptable.
+ SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
+ FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoadAlign);
+
+ SDValue NewStoreChain =
+ DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+
+ SDValue NewStore =
+ DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstStoreAlign);
+
+ // Transfer chain users from old loads to the new load.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+ SDValue(NewLoad.getNode(), 1));
+ }
+
+ if (UseAA) {
+ // Replace the all stores with the new store.
+ for (unsigned i = 0; i < NumElem; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
+ } else {
+ // Replace the last store with the new store.
+ CombineTo(LatestOp, NewStore);
+ // Erase all other stores.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ // Remove all Store nodes.
+ if (StoreNodes[i].MemNode == LatestOp)
+ continue;
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
+ deleteAndRecombine(St);
+ }
+ }
+
+ return true;
+}
+
+SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
+ SDLoc SL(ST);
+ SDValue ReplStore;
+
+ // Replace the chain to avoid dependency.
+ if (ST->isTruncatingStore()) {
+ ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
+ ST->getBasePtr(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ } else {
+ ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
+ ST->getMemOperand());
+ }
+
+ // Create token to keep both nodes around.
+ SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
+ MVT::Other, ST->getChain(), ReplStore);
+
+ // Make sure the new and old chains are cleaned up.
+ AddToWorklist(Token.getNode());
+
+ // Don't add users to work list.
+ return CombineTo(ST, Token, false);
+}
+
+SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
+ SDValue Value = ST->getValue();
+ if (Value.getOpcode() == ISD::TargetConstantFP)
+ return SDValue();
+
+ SDLoc DL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
+
+ // NOTE: If the original store is volatile, this transform must not increase
+ // the number of stores. For example, on x86-32 an f64 can be stored in one
+ // processor operation but an i64 (which is not legal) requires two. So the
+ // transform should not be done in this case.
+
+ SDValue Tmp;
+ switch (CFP->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("Unknown FP type");
+ case MVT::f16: // We don't do this for these yet.
+ case MVT::f80:
+ case MVT::f128:
+ case MVT::ppcf128:
+ return SDValue();
+ case MVT::f32:
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ ;
+ Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+ MVT::i32);
+ return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
+ }
+
+ return SDValue();
+ case MVT::f64:
+ if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+ !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+ ;
+ Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ getZExtValue(), SDLoc(CFP), MVT::i64);
+ return DAG.getStore(Chain, DL, Tmp,
+ Ptr, ST->getMemOperand());
+ }
+
+ if (!ST->isVolatile() &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+ // Many FP stores are not made apparent until after legalize, e.g. for
+ // argument passing. Since this is so common, custom legalize the
+ // 64-bit integer store into two 32-bit stores.
+ uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+ SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+ ST->getAlignment(), MMOFlags, AAInfo);
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, DL, Ptr.getValueType()));
+ Alignment = MinAlign(Alignment, 4U);
+ SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(4),
+ Alignment, MMOFlags, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ St0, St1);
+ }
+
+ return SDValue();
+ }
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // If this is a store of a bit convert, store the input value if the
+ // resultant store does not need a higher alignment than the original.
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
+ ST->isUnindexed()) {
+ EVT SVT = Value.getOperand(0).getValueType();
+ if (((!LegalOperations && !ST->isVolatile()) ||
+ TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
+ TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
+ unsigned OrigAlign = ST->getAlignment();
+ bool Fast = false;
+ if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
+ ST->getAddressSpace(), OrigAlign, &Fast) &&
+ Fast) {
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getPointerInfo(), OrigAlign,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ }
+ }
+ }
+
+ // Turn 'store undef, Ptr' -> nothing.
+ if (Value.isUndef() && ST->isUnindexed())
+ return Chain;
+
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > ST->getAlignment()) {
+ SDValue NewStore =
+ DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
+ ST->getMemoryVT(), Align,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ if (NewStore.getNode() != N)
+ return CombineTo(ST, NewStore, true);
+ }
+ }
+ }
+
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ if (SDValue NewST = TransformFPLoadStorePair(N))
+ return NewST;
+
+ bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+ : DAG.getSubtarget().useAA();
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
+ if (UseAA && ST->isUnindexed()) {
+ // FIXME: We should do this even without AA enabled. AA will just allow
+ // FindBetterChain to work in more situations. The problem with this is that
+ // any combine that expects memory operations to be on consecutive chains
+ // first needs to be updated to look for users of the same chain.
+
+ // Walk up chain skipping non-aliasing memory nodes, on this store and any
+ // adjacent stores.
+ if (findBetterNeighborChains(ST)) {
+ // replaceStoreChain uses CombineTo, which handled all of the worklist
+ // manipulation. Return the original node to not do anything else.
+ return SDValue(ST, 0);
+ }
+ Chain = ST->getChain();
+ }
+
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
+ // FIXME: is there such a thing as a truncating indexed store?
+ if (ST->isTruncatingStore() && ST->isUnindexed() &&
+ Value.getValueType().isInteger()) {
+ // See if we can simplify the input to this truncstore with knowledge that
+ // only the low bits are being used. For example:
+ // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
+ SDValue Shorter =
+ GetDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits()));
+ AddToWorklist(Value.getNode());
+ if (Shorter.getNode())
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
+ Ptr, ST->getMemoryVT(), ST->getMemOperand());
+
+ // Otherwise, see if we can simplify the operation with
+ // SimplifyDemandedBits, which only works if the value has a single use.
+ if (SimplifyDemandedBits(Value,
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits())))
+ return SDValue(N, 0);
+ }
+
+ // If this is a load followed by a store to the same location, then the store
+ // is dead/noop.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+ ST->isUnindexed() && !ST->isVolatile() &&
+ // There can't be any side effects between the load and store, such as
+ // a call or store.
+ Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // If this is a store followed by a store with the same value to the same
+ // location, then the store is dead/noop.
+ if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
+ if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
+ ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
+ ST1->isUnindexed() && !ST1->isVolatile()) {
+ // The store is dead, remove it.
+ return Chain;
+ }
+ }
+
+ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+ // truncating store. We can do this even if this is already a truncstore.
+ if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+ && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+ ST->getMemoryVT())) {
+ return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
+ Ptr, ST->getMemoryVT(), ST->getMemOperand());
+ }
+
+ // Only perform this optimization before the types are legal, because we
+ // don't want to perform this optimization on every DAGCombine invocation.
+ if (!LegalTypes) {
+ bool EverChanged = false;
+
+ do {
+ // There can be multiple store sequences on the same chain.
+ // Keep trying to merge store sequences until we are unable to do so
+ // or until we merge the last store on the chain.
+ bool Changed = MergeConsecutiveStores(ST);
+ EverChanged |= Changed;
+ if (!Changed) break;
+ } while (ST->getOpcode() != ISD::DELETED_NODE);
+
+ if (EverChanged)
+ return SDValue(N, 0);
+ }
+
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ //
+ // Make sure to do this only after attempting to merge stores in order to
+ // avoid changing the types of some subset of stores due to visit order,
+ // preventing their merging.
+ if (isa<ConstantFPSDNode>(Value)) {
+ if (SDValue NewSt = replaceStoreOfFPConstant(ST))
+ return NewSt;
+ }
+
+ return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InVec = N->getOperand(0);
+ SDValue InVal = N->getOperand(1);
+ SDValue EltNo = N->getOperand(2);
+ SDLoc dl(N);
+
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (InVal.isUndef())
+ return InVec;
+
+ EVT VT = InVec.getValueType();
+
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
+ // Check that we know which element is being inserted
+ if (!isa<ConstantSDNode>(EltNo))
+ return SDValue();
+ unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ // Canonicalize insert_vector_elt dag nodes.
+ // Example:
+ // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+ // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+ //
+ // Do this only if the child insert_vector node has one use; also
+ // do this only if indices are both constants and Idx1 < Idx0.
+ if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+ && isa<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned OtherElt =
+ cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+ if (Elt < OtherElt) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+ InVec.getOperand(0), InVal, EltNo);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+ VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+ }
+ }
+
+ // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
+ // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
+ // vector elements.
+ SmallVector<SDValue, 8> Ops;
+ // Do not combine these two vectors if the output vector will not replace
+ // the input vector.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
+ Ops.append(InVec.getNode()->op_begin(),
+ InVec.getNode()->op_end());
+ } else if (InVec.isUndef()) {
+ unsigned NElts = VT.getVectorNumElements();
+ Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
+ } else {
+ return SDValue();
+ }
+
+ // Insert the element
+ if (Elt < Ops.size()) {
+ // All the operands of BUILD_VECTOR must have the same type;
+ // we enforce that here.
+ EVT OpVT = Ops[0].getValueType();
+ if (InVal.getValueType() != OpVT)
+ InVal = OpVT.bitsGT(InVal.getValueType()) ?
+ DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
+ DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+ Ops[Elt] = InVal;
+ }
+
+ // Return the new vector
+ return DAG.getBuildVector(VT, dl, Ops);
+}
+
+SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+ assert(!OriginalLoad->isVolatile());
+
+ EVT ResultVT = EVE->getValueType(0);
+ EVT VecEltVT = InVecVT.getVectorElementType();
+ unsigned Align = OriginalLoad->getAlignment();
+ unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+ VecEltVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+ return SDValue();
+
+ Align = NewAlign;
+
+ SDValue NewPtr = OriginalLoad->getBasePtr();
+ SDValue Offset;
+ EVT PtrType = NewPtr.getValueType();
+ MachinePointerInfo MPI;
+ SDLoc DL(EVE);
+ if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+ int Elt = ConstEltNo->getZExtValue();
+ unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+ Offset = DAG.getConstant(PtrOff, DL, PtrType);
+ MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ } else {
+ Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
+ Offset = DAG.getNode(
+ ISD::MUL, DL, PtrType, Offset,
+ DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
+ MPI = OriginalLoad->getPointerInfo();
+ }
+ NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
+
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (ResultVT.bitsGT(VecEltVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
+ VecEltVT)
+ ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
+ OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
+ Align, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
+ MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ Chain = Load.getValue(1);
+ if (ResultVT.bitsLT(VecEltVT))
+ Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+ else
+ Load = DAG.getBitcast(ResultVT, Load);
+ }
+ WorklistRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Since we're explicitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorklist(Load.getNode());
+ AddUsersToWorklist(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorklist(EVE);
+ ++OpsNarrowed;
+ return SDValue(EVE, 0);
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+ // (vextract (scalar_to_vector val, 0) -> val
+ SDValue InVec = N->getOperand(0);
+ EVT VT = InVec.getValueType();
+ EVT NVT = N->getValueType(0);
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ // Check if the result type doesn't match the inserted element type. A
+ // SCALAR_TO_VECTOR may truncate the inserted element and the
+ // EXTRACT_VECTOR_ELT may widen the extracted vector.
+ SDValue InOp = InVec.getOperand(0);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
+ }
+ return InOp;
+ }
+
+ SDValue EltNo = N->getOperand(1);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+
+ // extract_vector_elt (build_vector x, y), 1 -> y
+ if (ConstEltNo &&
+ InVec.getOpcode() == ISD::BUILD_VECTOR &&
+ TLI.isTypeLegal(VT) &&
+ (InVec.hasOneUse() ||
+ TLI.aggressivelyPreferBuildVectorSources(VT))) {
+ SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
+ EVT InEltVT = Elt.getValueType();
+
+ // Sometimes build_vector's scalar input types do not match result type.
+ if (NVT == InEltVT)
+ return Elt;
+
+ // TODO: It may be useful to truncate if free if the build_vector implicitly
+ // converts.
+ }
+
+ // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
+ if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
+ ConstEltNo->isNullValue() && VT.isInteger()) {
+ SDValue BCSrc = InVec.getOperand(0);
+ if (BCSrc.getValueType().isScalarInteger())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
+ }
+
+ // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
+ //
+ // This only really matters if the index is non-constant since other combines
+ // on the constant elements already work.
+ if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ EltNo == InVec.getOperand(2)) {
+ SDValue Elt = InVec.getOperand(1);
+ return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
+ }
+
+ // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+ // We only perform this optimization before the op legalization phase because
+ // we may introduce new vector instructions which are not backed by TD
+ // patterns. For example on AVX, extracting elements from a wide vector
+ // without using extract_subvector. However, if we can find an underlying
+ // scalar value, then we can always use that.
+ if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ int NumElem = VT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+ // Find the new index to extract from.
+ int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
+
+ // Extracting an undef index is undef.
+ if (OrigElt == -1)
+ return DAG.getUNDEF(NVT);
+
+ // Select the right vector half to extract from.
+ SDValue SVInVec;
+ if (OrigElt < NumElem) {
+ SVInVec = InVec->getOperand(0);
+ } else {
+ SVInVec = InVec->getOperand(1);
+ OrigElt -= NumElem;
+ }
+
+ if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue InOp = SVInVec.getOperand(OrigElt);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
+ }
+
+ return InOp;
+ }
+
+ // FIXME: We should handle recursing on other vector shuffles and
+ // scalar_to_vector here as well.
+
+ if (!LegalOperations) {
+ EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
+ DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
+ }
+ }
+
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ }
+
+ // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
+ if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
+ ISD::isNormalLoad(InVec.getNode()) &&
+ !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
+ SDValue Index = N->getOperand(1);
+ if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
+ if (!OrigLoad->isVolatile()) {
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
+ OrigLoad);
+ }
+ }
+ }
+
+ // Perform only after legalization to ensure build_vector / vector_shuffle
+ // optimizations have already been done.
+ if (!LegalOperations) return SDValue();
+
+ // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+ // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+
+ if (ConstEltNo) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ LoadSDNode *LN0 = nullptr;
+ const ShuffleVectorSDNode *SVN = nullptr;
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ InVec.getOperand(0).getValueType() == ExtVT &&
+ ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+ } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+ // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+ // =>
+ // (load $addr+1*size)
+
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ // If the bit convert changed the number of elements, it is unsafe
+ // to examine the mask.
+ if (BCNumEltsChanged)
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
+ InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ InVec = InVec.getOperand(0);
+ }
+ if (ISD::isNormalLoad(InVec.getNode())) {
+ LN0 = cast<LoadSDNode>(InVec);
+ Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+ EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
+ }
+ }
+
+ // Make sure we found a non-volatile load and the extractelement is
+ // the only use.
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+ return SDValue();
+
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LVT);
+
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
+ }
+
+ return SDValue();
+}
+
+// Simplify (build_vec (ext )) to (bitcast (build_vec ))
+SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
+ // We perform this optimization post type-legalization because
+ // the type-legalizer often scalarizes integer-promoted vectors.
+ // Performing this optimization before may create bit-casts which
+ // will be type-legalized to complex code sequences.
+ // We perform this optimization only before the operation legalizer because we
+ // may introduce illegal operations.
+ if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
+ return SDValue();
+
+ unsigned NumInScalars = N->getNumOperands();
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of values
+ // which come from any_extend or zero_extend nodes. If so, we can create
+ // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+ // optimizations. We do not handle sign-extend because we can't fill the sign
+ // using shuffles.
+ EVT SourceType = MVT::Other;
+ bool AllAnyExt = true;
+
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ // Ignore undef inputs.
+ if (In.isUndef()) continue;
+
+ bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
+ bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+ // Abort if the element is not an extension.
+ if (!ZeroExt && !AnyExt) {
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // The input is a ZeroExt or AnyExt. Check the original type.
+ EVT InTy = In.getOperand(0).getValueType();
+
+ // Check that all of the widened source types are the same.
+ if (SourceType == MVT::Other)
+ // First time.
+ SourceType = InTy;
+ else if (InTy != SourceType) {
+ // Multiple income types. Abort.
+ SourceType = MVT::Other;
+ break;
+ }
+
+ // Check if all of the extends are ANY_EXTENDs.
+ AllAnyExt &= AnyExt;
+ }
+
+ // In order to have valid types, all of the inputs must be extended from the
+ // same source type and all of the inputs must be any or zero extend.
+ // Scalar sizes must be a power of two.
+ EVT OutScalarTy = VT.getScalarType();
+ bool ValidTypes = SourceType != MVT::Other &&
+ isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
+ isPowerOf2_32(SourceType.getSizeInBits());
+
+ // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+ // turn into a single shuffle instruction.
+ if (!ValidTypes)
+ return SDValue();
+
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+ unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+ assert(ElemRatio > 1 && "Invalid element size ratio");
+ SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+ DAG.getConstant(0, SDLoc(N), SourceType);
+
+ unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+ // Populate the new build_vector
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Cast = N->getOperand(i);
+ assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+ Cast.getOpcode() == ISD::ZERO_EXTEND ||
+ Cast.isUndef()) && "Invalid cast opcode");
+ SDValue In;
+ if (Cast.isUndef())
+ In = DAG.getUNDEF(SourceType);
+ else
+ In = Cast->getOperand(0);
+ unsigned Index = isLE ? (i * ElemRatio) :
+ (i * ElemRatio + (ElemRatio - 1));
+
+ assert(Index < Ops.size() && "Invalid index");
+ Ops[Index] = In;
+ }
+
+ // The type of the new BUILD_VECTOR node.
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+ assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Invalid vector size");
+ // Check if the new vector type is legal.
+ if (!isTypeLegal(VecVT)) return SDValue();
+
+ // Make the new BUILD_VECTOR.
+ SDValue BV = DAG.getBuildVector(VecVT, dl, Ops);
+
+ // The new BUILD_VECTOR node has the potential to be further optimized.
+ AddToWorklist(BV.getNode());
+ // Bitcast to the desired type.
+ return DAG.getBitcast(VT, BV);
+}
+
+SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ unsigned NumInScalars = N->getNumOperands();
+ SDLoc dl(N);
+
+ EVT SrcVT = MVT::Other;
+ unsigned Opcode = ISD::DELETED_NODE;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+ unsigned Opc = In.getOpcode();
+
+ if (Opc == ISD::UNDEF)
+ continue;
+
+ // If all scalar values are floats and converted from integers.
+ if (Opcode == ISD::DELETED_NODE &&
+ (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+ Opcode = Opc;
+ }
+
+ if (Opc != Opcode)
+ return SDValue();
+
+ EVT InVT = In.getOperand(0).getValueType();
+
+ // If all scalar values are typed differently, bail out. It's chosen to
+ // simplify BUILD_VECTOR of integer types.
+ if (SrcVT == MVT::Other)
+ SrcVT = InVT;
+ if (SrcVT != InVT)
+ return SDValue();
+ NumDefs++;
+ }
+
+ // If the vector has just one element defined, it's not worth to fold it into
+ // a vectorized one.
+ if (NumDefs < 2)
+ return SDValue();
+
+ assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
+ && "Should only handle conversion from integer to float.");
+ assert(SrcVT != MVT::Other && "Cannot determine source type!");
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
+
+ if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
+ return SDValue();
+
+ // Just because the floating-point vector type is legal does not necessarily
+ // mean that the corresponding integer vector type is.
+ if (!isTypeLegal(NVT))
+ return SDValue();
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+
+ if (In.isUndef())
+ Opnds.push_back(DAG.getUNDEF(SrcVT));
+ else
+ Opnds.push_back(In.getOperand(0));
+ }
+ SDValue BV = DAG.getBuildVector(NVT, dl, Opnds);
+ AddToWorklist(BV.getNode());
+
+ return DAG.getNode(Opcode, dl, VT, BV);
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+ unsigned NumInScalars = N->getNumOperands();
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ // A vector built entirely of undefs is undef.
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
+ return V;
+
+ if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
+ return V;
+
+ // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+ // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+ // at most two distinct vectors, turn this into a shuffle node.
+
+ // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // May only combine to shuffle after legalize if shuffle is legal.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
+ return SDValue();
+
+ SDValue VecIn1, VecIn2;
+ bool UsesZeroVector = false;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue Op = N->getOperand(i);
+ // Ignore undef inputs.
+ if (Op.isUndef()) continue;
+
+ // See if we can combine this build_vector into a blend with a zero vector.
+ if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
+ UsesZeroVector = true;
+ continue;
+ }
+
+ // If this input is something other than a EXTRACT_VECTOR_ELT with a
+ // constant index, bail out.
+ if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op.getOperand(1))) {
+ VecIn1 = VecIn2 = SDValue(nullptr, 0);
+ break;
+ }
+
+ // We allow up to two distinct input vectors.
+ SDValue ExtractedFromVec = Op.getOperand(0);
+ if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+ continue;
+
+ if (!VecIn1.getNode()) {
+ VecIn1 = ExtractedFromVec;
+ } else if (!VecIn2.getNode() && !UsesZeroVector) {
+ VecIn2 = ExtractedFromVec;
+ } else {
+ // Too many inputs.
+ VecIn1 = VecIn2 = SDValue(nullptr, 0);
+ break;
+ }
+ }
+
+ // If everything is good, we can make a shuffle operation.
+ if (VecIn1.getNode()) {
+ unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ unsigned Opcode = N->getOperand(i).getOpcode();
+ if (Opcode == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Operands can also be zero.
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
+ assert(UsesZeroVector &&
+ (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
+ "Unexpected node found!");
+ Mask.push_back(NumInScalars+i);
+ continue;
+ }
+
+ // If extracting from the first vector, just use the index directly.
+ SDValue Extract = N->getOperand(i);
+ SDValue ExtVal = Extract.getOperand(1);
+ unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+ if (Extract.getOperand(0) == VecIn1) {
+ Mask.push_back(ExtIndex);
+ continue;
+ }
+
+ // Otherwise, use InIdx + InputVecSize
+ Mask.push_back(InNumElements + ExtIndex);
+ }
+
+ // Avoid introducing illegal shuffles with zero.
+ if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+ return SDValue();
+
+ // We can't generate a shuffle node with mismatched input and output types.
+ // Attempt to transform a single input vector to the correct type.
+ if ((VT != VecIn1.getValueType())) {
+ // If the input vector type has a different base type to the output
+ // vector type, bail out.
+ EVT VTElemType = VT.getVectorElementType();
+ if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
+ (VecIn2.getNode() &&
+ (VecIn2.getValueType().getVectorElementType() != VTElemType)))
+ return SDValue();
+
+ // If the input vector is too small, widen it.
+ // We only support widening of vectors which are half the size of the
+ // output registers. For example XMM->YMM widening on X86 with AVX.
+ EVT VecInT = VecIn1.getValueType();
+ if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
+ // If we only have one small input, widen it by adding undef values.
+ if (!VecIn2.getNode())
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
+ DAG.getUNDEF(VecIn1.getValueType()));
+ else if (VecIn1.getValueType() == VecIn2.getValueType()) {
+ // If we have two small inputs of the same type, try to concat them.
+ VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
+ VecIn2 = SDValue(nullptr, 0);
+ } else
+ return SDValue();
+ } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
+ // If the input vector is too large, try to split it.
+ // We don't support having two input vectors that are too large.
+ // If the zero vector was used, we can not split the vector,
+ // since we'd need 3 inputs.
+ if (UsesZeroVector || VecIn2.getNode())
+ return SDValue();
+
+ if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
+ return SDValue();
+
+ // Try to replace VecIn1 with two extract_subvectors
+ // No need to update the masks, they should still be correct.
+ VecIn2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ DAG.getConstant(VT.getVectorNumElements(), dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ VecIn1 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ } else
+ return SDValue();
+ }
+
+ if (UsesZeroVector)
+ VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
+ DAG.getConstantFP(0.0, dl, VT);
+ else
+ // If VecIn2 is unused then change it to undef.
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+ // Check that we were able to transform all incoming values to the same
+ // type.
+ if (VecIn2.getValueType() != VecIn1.getValueType() ||
+ VecIn1.getValueType() != VT)
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ SDValue Ops[2];
+ Ops[0] = VecIn1;
+ Ops[1] = VecIn2;
+ return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
+ }
+
+ return SDValue();
+}
+
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT OpVT = N->getOperand(0).getValueType();
+
+ // If the operands are legal vectors, leave them alone.
+ if (TLI.isTypeLegal(OpVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 8> Ops;
+
+ EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+ SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+ // Keep track of what we encounter.
+ bool AnyInteger = false;
+ bool AnyFP = false;
+ for (const SDValue &Op : N->ops()) {
+ if (ISD::BITCAST == Op.getOpcode() &&
+ !Op.getOperand(0).getValueType().isVector())
+ Ops.push_back(Op.getOperand(0));
+ else if (ISD::UNDEF == Op.getOpcode())
+ Ops.push_back(ScalarUndef);
+ else
+ return SDValue();
+
+ // Note whether we encounter an integer or floating point scalar.
+ // If it's neither, bail out, it could be something weird like x86mmx.
+ EVT LastOpVT = Ops.back().getValueType();
+ if (LastOpVT.isFloatingPoint())
+ AnyFP = true;
+ else if (LastOpVT.isInteger())
+ AnyInteger = true;
+ else
+ return SDValue();
+ }
+
+ // If any of the operands is a floating point scalar bitcast to a vector,
+ // use floating point types throughout, and bitcast everything.
+ // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+ if (AnyFP) {
+ SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+ ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+ if (AnyInteger) {
+ for (SDValue &Op : Ops) {
+ if (Op.getValueType() == SVT)
+ continue;
+ if (Op.isUndef())
+ Op = ScalarUndef;
+ else
+ Op = DAG.getBitcast(SVT, Op);
+ }
+ }
+ }
+
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+ VT.getSizeInBits() / SVT.getSizeInBits());
+ return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
+}
+
+// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
+// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
+// most two distinct vectors the same size as the result, attempt to turn this
+// into a legal shuffle.
+static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ EVT OpVT = N->getOperand(0).getValueType();
+ int NumElts = VT.getVectorNumElements();
+ int NumOpElts = OpVT.getVectorNumElements();
+
+ SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
+ SmallVector<int, 8> Mask;
+
+ for (SDValue Op : N->ops()) {
+ // Peek through any bitcast.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (Op.isUndef()) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // What vector are we extracting the subvector from and at what index?
+ SDValue ExtVec = Op.getOperand(0);
+
+ // We want the EVT of the original extraction to correctly scale the
+ // extraction index.
+ EVT ExtVT = ExtVec.getValueType();
+
+ // Peek through any bitcast.
+ while (ExtVec.getOpcode() == ISD::BITCAST)
+ ExtVec = ExtVec.getOperand(0);
+
+ // UNDEF nodes convert to UNDEF shuffle mask values.
+ if (ExtVec.isUndef()) {
+ Mask.append((unsigned)NumOpElts, -1);
+ continue;
+ }
+
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ // Ensure that we are extracting a subvector from a vector the same
+ // size as the result.
+ if (ExtVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+
+ // Scale the subvector index to account for any bitcast.
+ int NumExtElts = ExtVT.getVectorNumElements();
+ if (0 == (NumExtElts % NumElts))
+ ExtIdx /= (NumExtElts / NumElts);
+ else if (0 == (NumElts % NumExtElts))
+ ExtIdx *= (NumElts / NumExtElts);
+ else
+ return SDValue();
+
+ // At most we can reference 2 inputs in the final shuffle.
+ if (SV0.isUndef() || SV0 == ExtVec) {
+ SV0 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx);
+ } else if (SV1.isUndef() || SV1 == ExtVec) {
+ SV1 = ExtVec;
+ for (int i = 0; i != NumOpElts; ++i)
+ Mask.push_back(i + ExtIdx + NumElts);
+ } else {
+ return SDValue();
+ }
+ }
+
+ if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+ DAG.getBitcast(VT, SV1), Mask);
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+ // If we only have one input vector, we don't need to do any concatenation.
+ if (N->getNumOperands() == 1)
+ return N->getOperand(0);
+
+ // Check if all of the operands are undefs.
+ EVT VT = N->getValueType(0);
+ if (ISD::allOperandsUndef(N))
+ return DAG.getUNDEF(VT);
+
+ // Optimize concat_vectors where all but the first of the vectors are undef.
+ if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
+ return Op.isUndef();
+ })) {
+ SDValue In = N->getOperand(0);
+ assert(In.getValueType().isVector() && "Must concat vectors");
+
+ // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
+ if (In->getOpcode() == ISD::BITCAST &&
+ !In->getOperand(0)->getValueType(0).isVector()) {
+ SDValue Scalar = In->getOperand(0);
+
+ // If the bitcast type isn't legal, it might be a trunc of a legal type;
+ // look through the trunc so we can still do the transform:
+ // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+ if (Scalar->getOpcode() == ISD::TRUNCATE &&
+ !TLI.isTypeLegal(Scalar.getValueType()) &&
+ TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+ Scalar = Scalar->getOperand(0);
+
+ EVT SclTy = Scalar->getValueType(0);
+
+ if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
+ return SDValue();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
+ VT.getSizeInBits() / SclTy.getSizeInBits());
+ if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
+ return SDValue();
+
+ SDLoc dl = SDLoc(N);
+ SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+
+ // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
+ // We have already tested above for an UNDEF only concatenation.
+ // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
+ // -> (BUILD_VECTOR A, B, ..., C, D, ...)
+ auto IsBuildVectorOrUndef = [](const SDValue &Op) {
+ return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
+ };
+ if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
+ SmallVector<SDValue, 8> Opnds;
+ EVT SVT = VT.getScalarType();
+
+ EVT MinVT = SVT;
+ if (!SVT.isFloatingPoint()) {
+ // If BUILD_VECTOR are from built from integer, they may have different
+ // operand types. Get the smallest type and truncate all operands to it.
+ bool FoundMinVT = false;
+ for (const SDValue &Op : N->ops())
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ EVT OpSVT = Op.getOperand(0)->getValueType(0);
+ MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
+ FoundMinVT = true;
+ }
+ assert(FoundMinVT && "Concat vector type mismatch");
+ }
+
+ for (const SDValue &Op : N->ops()) {
+ EVT OpVT = Op.getValueType();
+ unsigned NumElts = OpVT.getVectorNumElements();
+
+ if (ISD::UNDEF == Op.getOpcode())
+ Opnds.append(NumElts, DAG.getUNDEF(MinVT));
+
+ if (ISD::BUILD_VECTOR == Op.getOpcode()) {
+ if (SVT.isFloatingPoint()) {
+ assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
+ Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
+ } else {
+ for (unsigned i = 0; i != NumElts; ++i)
+ Opnds.push_back(
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
+ }
+ }
+ }
+
+ assert(VT.getVectorNumElements() == Opnds.size() &&
+ "Concat vector type mismatch");
+ return DAG.getBuildVector(VT, SDLoc(N), Opnds);
+ }
+
+ // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+ return V;
+
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
+ return V;
+
+ // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
+ // nodes often generate nop CONCAT_VECTOR nodes.
+ // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
+ // place the incoming vectors at the exact same location.
+ SDValue SingleSource = SDValue();
+ unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if (Op.isUndef())
+ continue;
+
+ // Check if this is the identity extract:
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // Find the single incoming vector for the extract_subvector.
+ if (SingleSource.getNode()) {
+ if (Op.getOperand(0) != SingleSource)
+ return SDValue();
+ } else {
+ SingleSource = Op.getOperand(0);
+
+ // Check the source type is the same as the type of the result.
+ // If not, this concat may extend the vector, so we can not
+ // optimize it away.
+ if (SingleSource.getValueType() != N->getValueType(0))
+ return SDValue();
+ }
+
+ unsigned IdentityIndex = i * PartNumElem;
+ ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ // The extract index must be constant.
+ if (!CS)
+ return SDValue();
+
+ // Check that we are reading from the identity index.
+ if (CS->getZExtValue() != IdentityIndex)
+ return SDValue();
+ }
+
+ if (SingleSource.getNode())
+ return SingleSource;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
+ EVT NVT = N->getValueType(0);
+ SDValue V = N->getOperand(0);
+
+ if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same
+ // type.
+ if (V->getOperand(0).getValueType() != NVT)
+ return SDValue();
+ unsigned Idx = N->getConstantOperandVal(1);
+ unsigned NumElems = NVT.getVectorNumElements();
+ assert((Idx % NumElems) == 0 &&
+ "IDX in concat is not a multiple of the result vector length.");
+ return V->getOperand(Idx / NumElems);
+ }
+
+ // Skip bitcasting
+ if (V->getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+
+ if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDLoc dl(N);
+ // Handle only simple case where vector being inserted and vector
+ // being extracted are of same type, and are half size of larger vectors.
+ EVT BigVT = V->getOperand(0).getValueType();
+ EVT SmallVT = V->getOperand(1).getValueType();
+ if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ return SDValue();
+
+ // Only handle cases where both indexes are constants with the same type.
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+
+ if (InsIdx && ExtIdx &&
+ InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+ ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ // Combine:
+ // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+ // Into:
+ // indices are equal or bit offsets are equal => V1
+ // otherwise => (extract_subvec V1, ExtIdx)
+ if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
+ ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+ return DAG.getBitcast(NVT, V->getOperand(1));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, NVT,
+ DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
+ N->getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
+ SDValue V, SelectionDAG &DAG) {
+ SDLoc DL(V);
+ EVT VT = V.getValueType();
+
+ switch (V.getOpcode()) {
+ default:
+ return V;
+
+ case ISD::CONCAT_VECTORS: {
+ EVT OpVT = V->getOperand(0).getValueType();
+ int OpSize = OpVT.getVectorNumElements();
+ SmallBitVector OpUsedElements(OpSize, false);
+ bool FoundSimplification = false;
+ SmallVector<SDValue, 4> NewOps;
+ NewOps.reserve(V->getNumOperands());
+ for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
+ SDValue Op = V->getOperand(i);
+ bool OpUsed = false;
+ for (int j = 0; j < OpSize; ++j)
+ if (UsedElements[i * OpSize + j]) {
+ OpUsedElements[j] = true;
+ OpUsed = true;
+ }
+ NewOps.push_back(
+ OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
+ : DAG.getUNDEF(OpVT));
+ FoundSimplification |= Op == NewOps.back();
+ OpUsedElements.reset();
+ }
+ if (FoundSimplification)
+ V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
+ return V;
+ }
+
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue BaseV = V->getOperand(0);
+ SDValue SubV = V->getOperand(1);
+ auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
+ if (!IdxN)
+ return V;
+
+ int SubSize = SubV.getValueType().getVectorNumElements();
+ int Idx = IdxN->getZExtValue();
+ bool SubVectorUsed = false;
+ SmallBitVector SubUsedElements(SubSize, false);
+ for (int i = 0; i < SubSize; ++i)
+ if (UsedElements[i + Idx]) {
+ SubVectorUsed = true;
+ SubUsedElements[i] = true;
+ UsedElements[i + Idx] = false;
+ }
+
+ // Now recurse on both the base and sub vectors.
+ SDValue SimplifiedSubV =
+ SubVectorUsed
+ ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
+ : DAG.getUNDEF(SubV.getValueType());
+ SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
+ if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
+ V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
+ return V;
+ }
+ }
+}
+
+static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
+ SDValue N1, SelectionDAG &DAG) {
+ EVT VT = SVN->getValueType(0);
+ int NumElts = VT.getVectorNumElements();
+ SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
+ for (int M : SVN->getMask())
+ if (M >= 0 && M < NumElts)
+ N0UsedElements[M] = true;
+ else if (M >= NumElts)
+ N1UsedElements[M - NumElts] = true;
+
+ SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
+ SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
+ if (S0 == N0 && S1 == N1)
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
+}
+
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
+// or turn a shuffle of a single concat into simpler shuffle then concat.
+static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ SmallVector<SDValue, 4> Ops;
+ EVT ConcatVT = N0.getOperand(0).getValueType();
+ unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
+ unsigned NumConcats = NumElts / NumElemsPerConcat;
+
+ // Special case: shuffle(concat(A,B)) can be more efficiently represented
+ // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
+ // half vector elements.
+ if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
+ std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
+ SVN->getMask().end(), [](int i) { return i == -1; })) {
+ N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
+ makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
+ N1 = DAG.getUNDEF(ConcatVT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
+ }
+
+ // Look at every vector that's inserted. We're looking for exact
+ // subvector-sized copies from a concatenated vector
+ for (unsigned I = 0; I != NumConcats; ++I) {
+ // Make sure we're dealing with a copy.
+ unsigned Begin = I * NumElemsPerConcat;
+ bool AllUndef = true, NoUndef = true;
+ for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
+ if (SVN->getMaskElt(J) >= 0)
+ AllUndef = false;
+ else
+ NoUndef = false;
+ }
+
+ if (NoUndef) {
+ if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
+ return SDValue();
+
+ for (unsigned J = 1; J != NumElemsPerConcat; ++J)
+ if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
+ return SDValue();
+
+ unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
+ if (FirstElt < N0.getNumOperands())
+ Ops.push_back(N0.getOperand(FirstElt));
+ else
+ Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
+
+ } else if (AllUndef) {
+ Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
+ } else { // Mixed with general masks and undefs, can't do optimization.
+ return SDValue();
+ }
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N0.isUndef() && N1.isUndef())
+ return DAG.getUNDEF(VT);
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N0 == N1) {
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) Idx -= NumElts;
+ NewMask.push_back(Idx);
+ }
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N0.isUndef())
+ return DAG.getCommutedVectorShuffle(*SVN);
+
+ // Remove references to rhs if it is undef
+ if (N1.isUndef()) {
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= (int)NumElts) {
+ Idx = -1;
+ Changed = true;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
+ }
+
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector.
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
+ SDNode *V = N0.getNode();
+
+ // If this is a bit convert that changes the element type of the vector but
+ // not the number of vector elements, look through it. Be careful not to
+ // look though conversions that change things like v4f32 to v2f64.
+ if (V->getOpcode() == ISD::BITCAST) {
+ SDValue ConvInput = V->getOperand(0);
+ if (ConvInput.getValueType().isVector() &&
+ ConvInput.getValueType().getVectorNumElements() == NumElts)
+ V = ConvInput.getNode();
+ }
+
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!V->getOperand(i).isUndef()) {
+ Base = V->getOperand(i);
+ break;
+ }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
+ }
+ }
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
+
+ // Canonicalize any other splat as a build_vector.
+ const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+ SmallVector<SDValue, 8> Ops(NumElts, Splatted);
+ SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (V->getValueType(0) != VT)
+ NewBV = DAG.getBitcast(VT, NewBV);
+ return NewBV;
+ }
+ }
+
+ // There are various patterns used to build up a vector from smaller vectors,
+ // subvectors, or elements. Scan chains of these and replace unused insertions
+ // or components with undef.
+ if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
+ return S;
+
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+ Level < AfterLegalizeVectorOps &&
+ (N1.isUndef() ||
+ (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
+ if (SDValue V = partitionShuffleOfConcats(N, DAG))
+ return V;
+ }
+
+ // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+ // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ SmallVector<SDValue, 8> Ops;
+ for (int M : SVN->getMask()) {
+ SDValue Op = DAG.getUNDEF(VT.getScalarType());
+ if (M >= 0) {
+ int Idx = M % NumElts;
+ SDValue &S = (M < (int)NumElts ? N0 : N1);
+ if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
+ Op = S.getOperand(Idx);
+ } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
+ if (Idx == 0)
+ Op = S.getOperand(0);
+ } else {
+ // Operand can't be combined - bail out.
+ break;
+ }
+ }
+ Ops.push_back(Op);
+ }
+ if (Ops.size() == VT.getVectorNumElements()) {
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ EVT SVT = VT.getScalarType();
+ if (SVT.isInteger())
+ for (SDValue &Op : Ops)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+ if (SVT != VT.getScalarType())
+ for (SDValue &Op : Ops)
+ Op = TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
+ return DAG.getBuildVector(VT, SDLoc(N), Ops);
+ }
+ }
+
+ // If this shuffle only has a single input that is a bitcasted shuffle,
+ // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
+ // back to their original types.
+ if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N1.isUndef() && Level < AfterLegalizeVectorOps &&
+ TLI.isTypeLegal(VT)) {
+
+ // Peek through the bitcast only if there is one user.
+ SDValue BC0 = N0;
+ while (BC0.getOpcode() == ISD::BITCAST) {
+ if (!BC0.hasOneUse())
+ break;
+ BC0 = BC0.getOperand(0);
+ }
+
+ auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
+ if (Scale == 1)
+ return SmallVector<int, 8>(Mask.begin(), Mask.end());
+
+ SmallVector<int, 8> NewMask;
+ for (int M : Mask)
+ for (int s = 0; s != Scale; ++s)
+ NewMask.push_back(M < 0 ? -1 : Scale * M + s);
+ return NewMask;
+ };
+
+ if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
+ EVT SVT = VT.getScalarType();
+ EVT InnerVT = BC0->getValueType(0);
+ EVT InnerSVT = InnerVT.getScalarType();
+
+ // Determine which shuffle works with the smaller scalar type.
+ EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
+ EVT ScaleSVT = ScaleVT.getScalarType();
+
+ if (TLI.isTypeLegal(ScaleVT) &&
+ 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
+ 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
+
+ int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+ int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+
+ // Scale the shuffle masks to the smaller scalar type.
+ ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
+ SmallVector<int, 8> InnerMask =
+ ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
+ SmallVector<int, 8> OuterMask =
+ ScaleShuffleMask(SVN->getMask(), OuterScale);
+
+ // Merge the shuffle masks.
+ SmallVector<int, 8> NewMask;
+ for (int M : OuterMask)
+ NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
+
+ // Test for shuffle mask legality over both commutations.
+ SDValue SV0 = BC0->getOperand(0);
+ SDValue SV1 = BC0->getOperand(1);
+ bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ if (!LegalMask) {
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ }
+
+ if (LegalMask) {
+ SV0 = DAG.getBitcast(ScaleVT, SV0);
+ SV1 = DAG.getBitcast(ScaleVT, SV1);
+ return DAG.getBitcast(
+ VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+ }
+ }
+ }
+ }
+
+ // Canonicalize shuffles according to rules:
+ // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+ // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+ // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ TLI.isTypeLegal(VT)) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = N1->getOperand(0);
+ SDValue SV1 = N1->getOperand(1);
+ bool HasSameOp0 = N0 == SV0;
+ bool IsSV1Undef = SV1.isUndef();
+ if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+ // Commute the operands of this shuffle so that next rule
+ // will trigger.
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Try to fold according to rules:
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // Don't try to fold shuffles with illegal type.
+ // Only fold if this shuffle is the only user of the other shuffle.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+ Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0, SV1;
+ SmallVector<int, 4> Mask;
+ // Compute the combined shuffle mask for a shuffle with SV0 as the first
+ // operand, and SV1 as the second operand.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ SDValue CurrentVec;
+ if (Idx < (int)NumElts) {
+ // This shuffle index refers to the inner shuffle N0. Lookup the inner
+ // shuffle mask to identify which vector is actually referenced.
+ Idx = OtherSV->getMaskElt(Idx);
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
+ : OtherSV->getOperand(1);
+ } else {
+ // This shuffle index references an element within N1.
+ CurrentVec = N1;
+ }
+
+ // Simple case where 'CurrentVec' is UNDEF.
+ if (CurrentVec.isUndef()) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Canonicalize the shuffle index. We don't know yet if CurrentVec
+ // will be the first or second operand of the combined shuffle.
+ Idx = Idx % NumElts;
+ if (!SV0.getNode() || SV0 == CurrentVec) {
+ // Ok. CurrentVec is the left hand side.
+ // Update the mask accordingly.
+ SV0 = CurrentVec;
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ // Bail out if we cannot convert the shuffle pair into a single shuffle.
+ if (SV1.getNode() && SV1 != CurrentVec)
+ return SDValue();
+
+ // Ok. CurrentVec is the right hand side.
+ // Update the mask accordingly.
+ SV1 = CurrentVec;
+ Mask.push_back(Idx + NumElts);
+ }
+
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ bool isUndefMask = true;
+ for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+ isUndefMask &= Mask[i] < 0;
+
+ if (isUndefMask)
+ return DAG.getUNDEF(VT);
+
+ if (!SV0.getNode())
+ SV0 = DAG.getUNDEF(VT);
+ if (!SV1.getNode())
+ SV1 = DAG.getUNDEF(VT);
+
+ // Avoid introducing shuffles with illegal mask.
+ if (!TLI.isShuffleMaskLegal(Mask, VT)) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+
+ if (!TLI.isShuffleMaskLegal(Mask, VT))
+ return SDValue();
+
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ std::swap(SV0, SV1);
+ }
+
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
+ SDValue InVal = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
+ // with a VECTOR_SHUFFLE.
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue InVec = InVal->getOperand(0);
+ SDValue EltNo = InVal->getOperand(1);
+
+ // FIXME: We could support implicit truncation if the shuffle can be
+ // scaled to a smaller vector scalar type.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
+ if (C0 && VT == InVec.getValueType() &&
+ VT.getScalarType() == InVal.getValueType()) {
+ SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
+ int Elt = C0->getZExtValue();
+ NewMask[0] = Elt;
+
+ if (TLI.isShuffleMaskLegal(NewMask, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
+ NewMask);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+
+ if (N0.getValueType() != N1.getValueType())
+ return SDValue();
+
+ // If the input vector is a concatenation, and the insert replaces
+ // one of the halves, we can optimize into a single concat_vectors.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
+ N2.getOpcode() == ISD::Constant) {
+ APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
+ EVT VT = N->getValueType(0);
+
+ // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+ // (concat_vectors Z, Y)
+ if (InsIdx == 0)
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
+ N0.getOperand(1));
+
+ // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+ // (concat_vectors X, Z)
+ if (InsIdx == VT.getVectorNumElements() / 2)
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
+ N1);
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (fp_to_fp16 (fp16_to_fp op)) -> op
+ if (N0->getOpcode() == ISD::FP16_TO_FP)
+ return N0->getOperand(0);
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
+ if (N0->getOpcode() == ISD::AND) {
+ ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
+ if (AndConst && AndConst->getAPIntValue() == 0xffff) {
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
+ N0.getOperand(0));
+ }
+ }
+
+ return SDValue();
+}
+
+/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
+/// with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+/// vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDLoc dl(N);
+
+ // Make sure we're not running after operation legalization where it
+ // may have custom lowered the vector shuffles.
+ if (LegalOperations)
+ return SDValue();
+
+ if (N->getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (RHS.getOpcode() == ISD::BITCAST)
+ RHS = RHS.getOperand(0);
+
+ if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ EVT RVT = RHS.getValueType();
+ unsigned NumElts = RHS.getNumOperands();
+
+ // Attempt to create a valid clear mask, splitting the mask into
+ // sub elements and checking to see if each is
+ // all zeros or all ones - suitable for shuffle masking.
+ auto BuildClearMask = [&](int Split) {
+ int NumSubElts = NumElts * Split;
+ int NumSubBits = RVT.getScalarSizeInBits() / Split;
+
+ SmallVector<int, 8> Indices;
+ for (int i = 0; i != NumSubElts; ++i) {
+ int EltIdx = i / Split;
+ int SubIdx = i % Split;
+ SDValue Elt = RHS.getOperand(EltIdx);
+ if (Elt.isUndef()) {
+ Indices.push_back(-1);
+ continue;
+ }
+
+ APInt Bits;
+ if (isa<ConstantSDNode>(Elt))
+ Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
+ else if (isa<ConstantFPSDNode>(Elt))
+ Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
+ else
+ return SDValue();
+
+ // Extract the sub element from the constant bit mask.
+ if (DAG.getDataLayout().isBigEndian()) {
+ Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
+ } else {
+ Bits = Bits.lshr(SubIdx * NumSubBits);
+ }
+
+ if (Split > 1)
+ Bits = Bits.trunc(NumSubBits);
+
+ if (Bits.isAllOnesValue())
+ Indices.push_back(i);
+ else if (Bits == 0)
+ Indices.push_back(i + NumSubElts);
+ else
+ return SDValue();
+ }
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
+ EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
+ if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
+ return SDValue();
+
+ SDValue Zero = DAG.getConstant(0, dl, ClearVT);
+ return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
+ DAG.getBitcast(ClearVT, LHS),
+ Zero, Indices));
+ };
+
+ // Determine maximum split level (byte level masking).
+ int MaxSplit = 1;
+ if (RVT.getScalarSizeInBits() % 8 == 0)
+ MaxSplit = RVT.getScalarSizeInBits() / 8;
+
+ for (int Split = 1; Split <= MaxSplit; ++Split)
+ if (RVT.getScalarSizeInBits() % Split == 0)
+ if (SDValue S = BuildClearMask(Split))
+ return S;
+
+ return SDValue();
+}
+
+/// Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVBinOp only works on vectors!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Ops[] = {LHS, RHS};
+
+ // See if we can constant fold the vector operation.
+ if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
+ N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ return Fold;
+
+ // Try to convert a constant mask AND into a shuffle clear mask.
+ if (SDValue Shuffle = XformToShuffleWithZero(N))
+ return Shuffle;
+
+ // Type legalization might introduce new shuffles in the DAG.
+ // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
+ // -> (shuffle (VBinOp (A, B)), Undef, Mask).
+ if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
+ isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ LHS.getOperand(1).isUndef() &&
+ RHS.getOperand(1).isUndef()) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
+
+ if (SVN0->getMask().equals(SVN1->getMask())) {
+ EVT VT = N->getValueType(0);
+ SDValue UndefVector = LHS.getOperand(1);
+ SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ LHS.getOperand(0), RHS.getOperand(0),
+ N->getFlags());
+ AddUsersToWorklist(N);
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
+ SVN0->getMask());
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2) {
+ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+ SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+ // If we got a simplified select_cc node back from SimplifySelectCC, then
+ // break it down into a new SETCC node, and a new SELECT node, and then return
+ // the SELECT node, since we were called with a SELECT node.
+ if (SCC.getNode()) {
+ // Check to see if we got a select_cc back (to turn into setcc/select).
+ // Otherwise, just return whatever node we got back, like fabs.
+ if (SCC.getOpcode() == ISD::SELECT_CC) {
+ SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
+ N0.getValueType(),
+ SCC.getOperand(0), SCC.getOperand(1),
+ SCC.getOperand(4));
+ AddToWorklist(SETCC.getNode());
+ return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+ SCC.getOperand(2), SCC.getOperand(3));
+ }
+
+ return SCC;
+ }
+ return SDValue();
+}
+
+/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
+/// being selected between, see if we can simplify the select. Callers of this
+/// should assume that TheSelect is deleted if this returns true. As such, they
+/// should return the appropriate thing (e.g. the node) back to the top-level of
+/// the DAG combiner loop to avoid it being looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+ SDValue RHS) {
+
+ // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
+ // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
+ if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
+ if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
+ // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
+ SDValue Sqrt = RHS;
+ ISD::CondCode CC;
+ SDValue CmpLHS;
+ const ConstantFPSDNode *Zero = nullptr;
+
+ if (TheSelect->getOpcode() == ISD::SELECT_CC) {
+ CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+ CmpLHS = TheSelect->getOperand(0);
+ Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
+ } else {
+ // SELECT or VSELECT
+ SDValue Cmp = TheSelect->getOperand(0);
+ if (Cmp.getOpcode() == ISD::SETCC) {
+ CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+ CmpLHS = Cmp.getOperand(0);
+ Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
+ }
+ }
+ if (Zero && Zero->isZero() &&
+ Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
+ CC == ISD::SETULT || CC == ISD::SETLT)) {
+ // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
+ CombineTo(TheSelect, Sqrt);
+ return true;
+ }
+ }
+ }
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
+ // If this is a select from two identical things, try to pull the operation
+ // through the select.
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
+
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
+ // Do not let this transformation reduce the number of volatile loads.
+ LLD->isVolatile() || RLD->isVolatile() ||
+ // FIXME: If either is a pre/post inc/dec load,
+ // we'd need to split out the address adjustment.
+ LLD->isIndexed() || RLD->isIndexed() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
+ // FIXME: this discards src value information. This is
+ // over-conservative. It would be beneficial to be able to remember
+ // both potential memory locations. Since we are discarding
+ // src value info, don't do the transformation if the memory
+ // locations are not in the default address space.
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0 ||
+ !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
+ LLD->getBasePtr().getValueType()))
+ return false;
+
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+ (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+ return false;
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) ||
+ RLD->isPredecessorOf(LLD))
+ return false;
+ Addr = DAG.getSelect(SDLoc(TheSelect),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ (RLD->hasAnyUseOfValue(1) &&
+ (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
+ return false;
+
+ Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+
+ SDValue Load;
+ // It is safe to replace the two loads if they have different alignments,
+ // but the new load must be the minimum (most restrictive) alignment of the
+ // inputs.
+ unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
+ MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
+ if (!RLD->isInvariant())
+ MMOFlags &= ~MachineMemOperand::MOInvariant;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ // FIXME: Discards pointer and AA info.
+ Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
+ LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
+ MMOFlags);
+ } else {
+ // FIXME: Discards pointer and AA info.
+ Load = DAG.getExtLoad(
+ LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
+ : LLD->getExtensionType(),
+ SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
+ MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
+ }
+
+ return false;
+}
+
+/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
+ SDValue N2, SDValue N3, ISD::CondCode CC,
+ bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
+ EVT VT = N2.getValueType();
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+
+ // Determine if the condition we're dealing with is constant
+ SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
+ N0, N1, CC, DL, false);
+ if (SCC.getNode()) AddToWorklist(SCC.getNode());
+
+ if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+ // fold select_cc true, x, y -> x
+ // fold select_cc false, x, y -> y
+ return !SCCC->isNullValue() ? N2 : N3;
+ }
+
+ // Check to see if we can simplify the select into an fabs node
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+ // Allow either -0.0 or 0.0
+ if (CFP->isZero()) {
+ // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+ if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+ N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+ N2 == N3.getOperand(0))
+ return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+ // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+ if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+ N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+ N2.getOperand(0) == N3)
+ return DAG.getNode(ISD::FABS, DL, VT, N3);
+ }
+ }
+
+ // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+ // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+ // in it. This is a win when the constant is not otherwise available because
+ // it replaces two constant pool loads with one. We only do this if the FP
+ // type is known to be legal, because if it isn't, then we are before legalize
+ // types an we want the other legalization to happen first (e.g. to avoid
+ // messing with soft float) and if the ConstantFP is not legal, because if
+ // it is legal, we may not need to store the FP constant in a constant pool.
+ if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+ if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+ if (TLI.isTypeLegal(N2.getValueType()) &&
+ (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+ TargetLowering::Legal &&
+ !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
+ !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
+ // If both constants have multiple uses, then we won't need to do an
+ // extra load, they are likely around in registers for other users.
+ (TV->hasOneUse() || FV->hasOneUse())) {
+ Constant *Elts[] = {
+ const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue())
+ };
+ Type *FPTy = Elts[0]->getType();
+ const DataLayout &TD = DAG.getDataLayout();
+
+ // Create a ConstantArray of the two constants.
+ Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
+ SDValue CPIdx =
+ DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+ TD.getPrefTypeAlignment(FPTy));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get the offsets to the 0 and 1 element of the array so that we can
+ // select between them.
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
+ unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
+
+ SDValue Cond = DAG.getSetCC(DL,
+ getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ AddToWorklist(Cond.getNode());
+ SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
+ Cond, One, Zero);
+ AddToWorklist(CstOffset.getNode());
+ CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
+ CstOffset);
+ AddToWorklist(CPIdx.getNode());
+ return DAG.getLoad(
+ TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ Alignment);
+ }
+ }
+
+ // Check to see if we can perform the "gzip trick", transforming
+ // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+ if (isNullConstant(N3) && CC == ISD::SETLT &&
+ (isNullConstant(N1) || // (a < 0) ? b : 0
+ (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0
+ EVT XType = N0.getValueType();
+ EVT AType = N2.getValueType();
+ if (XType.bitsGE(AType)) {
+ // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+ // single-bit constant.
+ if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
+ unsigned ShCtV = N2C->getAPIntValue().logBase2();
+ ShCtV = XType.getSizeInBits() - ShCtV - 1;
+ SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
+ getShiftAmountTy(N0.getValueType()));
+ SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
+ XType, N0, ShCt);
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+
+ SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
+ XType, N0,
+ DAG.getConstant(XType.getSizeInBits() - 1,
+ SDLoc(N0),
+ getShiftAmountTy(N0.getValueType())));
+ AddToWorklist(Shift.getNode());
+
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
+ }
+
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
+ SDValue AndLHS = N0->getOperand(0);
+ ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ // Shift the tested bit over the sign bit.
+ const APInt &AndMask = ConstAndRHS->getAPIntValue();
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is either
+ // all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+
+ // fold select C, 16, 0 -> shl C, 4
+ if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
+ TLI.getBooleanContents(N0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent) {
+
+ // If the caller doesn't want us to simplify this into a zext of a compare,
+ // don't do it.
+ if (NotExtCompare && N2C->isOne())
+ return SDValue();
+
+ // Get a SetCC of the condition
+ // NOTE: Don't create a SETCC if it's not legal on this target.
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
+ SDValue Temp, SCC;
+ // cast from setcc result type to select result type
+ if (LegalTypes) {
+ SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ if (N2.getValueType().bitsLT(SCC.getValueType()))
+ Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
+ N2.getValueType());
+ else
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
+ N2.getValueType(), SCC);
+ } else {
+ SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
+ Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
+ N2.getValueType(), SCC);
+ }
+
+ AddToWorklist(SCC.getNode());
+ AddToWorklist(Temp.getNode());
+
+ if (N2C->isOne())
+ return Temp;
+
+ // shl setcc result by log2 n2c
+ return DAG.getNode(
+ ISD::SHL, DL, N2.getValueType(), Temp,
+ DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
+ getShiftAmountTy(Temp.getValueType())));
+ }
+ }
+
+ // Check to see if this is an integer abs.
+ // select_cc setg[te] X, 0, X, -X ->
+ // select_cc setgt X, -1, X, -X ->
+ // select_cc setl[te] X, 0, -X, X ->
+ // select_cc setlt X, 1, -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N1C) {
+ ConstantSDNode *SubC = nullptr;
+ if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
+ else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
+ (N1C->isOne() && CC == ISD::SETLT)) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
+
+ EVT XType = N0.getValueType();
+ if (SubC && SubC->isNullValue() && XType.isInteger()) {
+ SDLoc DL(N0);
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
+ N0,
+ DAG.getConstant(XType.getSizeInBits() - 1, DL,
+ getShiftAmountTy(N0.getValueType())));
+ SDValue Add = DAG.getNode(ISD::ADD, DL,
+ XType, N0, Shift);
+ AddToWorklist(Shift.getNode());
+ AddToWorklist(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+ }
+ }
+
+ // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
+ // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
+ // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
+ // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
+ // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
+ // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
+ // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
+ // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
+ if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ SDValue ValueOnZero = N2;
+ SDValue Count = N3;
+ // If the condition is NE instead of E, swap the operands.
+ if (CC == ISD::SETNE)
+ std::swap(ValueOnZero, Count);
+ // Check if the value on zero is a constant equal to the bits in the type.
+ if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
+ if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
+ // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
+ // legal, combine to just cttz.
+ if ((Count.getOpcode() == ISD::CTTZ ||
+ Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
+ N0 == Count.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
+ return DAG.getNode(ISD::CTTZ, DL, VT, N0);
+ // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
+ // legal, combine to just ctlz.
+ if ((Count.getOpcode() == ISD::CTLZ ||
+ Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
+ N0 == Count.getOperand(0) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
+ return DAG.getNode(ISD::CTLZ, DL, VT, N0);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
+/// This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ bool foldBooleans) {
+ TargetLowering::DAGCombinerInfo
+ DagCombineInfo(DAG, Level, false, this);
+ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// Given an ISD::SDIV node expressing a divide by constant, return
+/// a DAG expression to select that will generate the same value by multiplying
+/// by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ return SDValue();
+
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (C->isNullValue())
+ return SDValue();
+
+ std::vector<SDNode*> Built;
+ SDValue S =
+ TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
+
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+}
+
+/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
+/// DAG expression that will generate the same value by right shifting.
+SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (C->isNullValue())
+ return SDValue();
+
+ std::vector<SDNode *> Built;
+ SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
+
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+}
+
+/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
+/// expression that will generate the same value by multiplying by a magic
+/// number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ // when optimising for minimum size, we don't want to expand a div to a mul
+ // and a shift.
+ if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ return SDValue();
+
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (C->isNullValue())
+ return SDValue();
+
+ std::vector<SDNode*> Built;
+ SDValue S =
+ TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
+
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+}
+
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // Expose the DAG combiner to the target combiner implementations.
+ TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+
+ unsigned Iterations = 0;
+ if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
+ if (Iterations) {
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal, we need to find the zero of the function:
+ // F(X) = A X - 1 [which has a zero at X = 1/A]
+ // =>
+ // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+ // does not require additional intermediate precision]
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+
+ AddToWorklist(Est.getNode());
+
+ // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
+ AddToWorklist(Est.getNode());
+ }
+ }
+ return Est;
+ }
+
+ return SDValue();
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+/// As a result, we precompute A/2 prior to the iteration loop.
+SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
+ unsigned Iterations,
+ SDNodeFlags *Flags, bool Reciprocal) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
+
+ // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
+ AddToWorklist(HalfArg.getNode());
+
+ HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
+ AddToWorklist(HalfArg.getNode());
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
+ AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
+ AddToWorklist(Est.getNode());
+ }
+
+ // If non-reciprocal square root is requested, multiply the result by Arg.
+ if (!Reciprocal) {
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
+ AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+/// =>
+/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
+SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
+ unsigned Iterations,
+ SDNodeFlags *Flags, bool Reciprocal) {
+ EVT VT = Arg.getValueType();
+ SDLoc DL(Arg);
+ SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
+ SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
+
+ // This routine must enter the loop below to work correctly
+ // when (Reciprocal == false).
+ assert(Iterations > 0);
+
+ // Newton iterations for reciprocal square root:
+ // E = (E * -0.5) * ((A * E) * E + -3.0)
+ for (unsigned i = 0; i < Iterations; ++i) {
+ SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
+ AddToWorklist(AE.getNode());
+
+ SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
+ AddToWorklist(AEE.getNode());
+
+ SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
+ AddToWorklist(RHS.getNode());
+
+ // When calculating a square root at the last iteration build:
+ // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
+ // (notice a common subexpression)
+ SDValue LHS;
+ if (Reciprocal || (i + 1) < Iterations) {
+ // RSQRT: LHS = (E * -0.5)
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
+ } else {
+ // SQRT: LHS = (A * E) * -0.5
+ LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
+ }
+ AddToWorklist(LHS.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
+ AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+}
+
+/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
+/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
+/// Op can be zero.
+SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
+ bool Reciprocal) {
+ if (Level >= AfterLegalizeDAG)
+ return SDValue();
+
+ // Expose the DAG combiner to the target combiner implementations.
+ TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+ unsigned Iterations = 0;
+ bool UseOneConstNR = false;
+ if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+ AddToWorklist(Est.getNode());
+ if (Iterations) {
+ Est = UseOneConstNR
+ ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ }
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
+ return buildSqrtEstimateImpl(Op, Flags, true);
+}
+
+SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
+ SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
+ if (!Est)
+ return SDValue();
+
+ // Unfortunately, Est is now NaN if the input was exactly 0.
+ // Select out this case and force the answer to 0.
+ EVT VT = Est.getValueType();
+ SDLoc DL(Op);
+ SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
+ EVT CCVT = getSetCCResultType(VT);
+ SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+
+ Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
+ Zero, Est);
+ AddToWorklist(Est.getNode());
+ return Est;
+}
+
+/// Return true if base is a frame index, which is known not to alias with
+/// anything but itself. Provides base object and offset as results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+ const GlobalValue *&GV, const void *&CV) {
+ // Assume it is a primitive operation.
+ Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
+
+ // If it's an adding a simple constant then integrate the offset.
+ if (Base.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+ Base = Base.getOperand(0);
+ Offset += C->getZExtValue();
+ }
+ }
+
+ // Return the underlying GlobalValue, and update the Offset. Return false
+ // for GlobalAddressSDNode since the same GlobalAddress may be represented
+ // by multiple nodes with different offsets.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+ GV = G->getGlobal();
+ Offset += G->getOffset();
+ return false;
+ }
+
+ // Return the underlying Constant value, and update the Offset. Return false
+ // for ConstantSDNodes since the same constant pool entry may be represented
+ // by multiple nodes with different offsets.
+ if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+ CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
+ : (const void *)C->getConstVal();
+ Offset += C->getOffset();
+ return false;
+ }
+ // If it's any of the following then it can't alias with anything but itself.
+ return isa<FrameIndexSDNode>(Base);
+}
+
+/// Return true if there is any possibility that the two addresses overlap.
+bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
+ // If they are the same then they must be aliases.
+ if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
+
+ // If they are both volatile then they cannot be reordered.
+ if (Op0->isVolatile() && Op1->isVolatile()) return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias. These should really be checking the equivalent of mayWrite,
+ // but it only matters for memory nodes other than load /store.
+ if (Op0->isInvariant() && Op1->writeMem())
+ return false;
+
+ if (Op1->isInvariant() && Op0->writeMem())
+ return false;
+
+ // Gather base node and offset information.
+ SDValue Base1, Base2;
+ int64_t Offset1, Offset2;
+ const GlobalValue *GV1, *GV2;
+ const void *CV1, *CV2;
+ bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
+ Base1, Offset1, GV1, CV1);
+ bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
+ Base2, Offset2, GV2, CV2);
+
+ // If they have a same base address then check to see if they overlap.
+ if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
+ return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
+ (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
+
+ // It is possible for different frame indices to alias each other, mostly
+ // when tail call optimization reuses return address slots for arguments.
+ // To catch this case, look up the actual index of frame indices to compute
+ // the real alias relationship.
+ if (isFrameIndex1 && isFrameIndex2) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
+ (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
+ }
+
+ // Otherwise, if we know what the bases are, and they aren't identical, then
+ // we know they cannot alias.
+ if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+ return false;
+
+ // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+ // compared to the size and offset of the access, we may be able to prove they
+ // do not alias. This check is conservative for now to catch cases created by
+ // splitting vector types.
+ if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
+ (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
+ (Op0->getMemoryVT().getSizeInBits() >> 3 ==
+ Op1->getMemoryVT().getSizeInBits() >> 3) &&
+ (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
+ int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
+ int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
+
+ // There is no overlap between these relatively aligned accesses of similar
+ // size, return no alias.
+ if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
+ (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
+ return false;
+ }
+
+ bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
+ ? CombinerGlobalAA
+ : DAG.getSubtarget().useAA();
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
+ if (UseAA &&
+ Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
+ Op1->getSrcValueOffset());
+ int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
+ Op0->getSrcValueOffset() - MinOffset;
+ int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
+ Op1->getSrcValueOffset() - MinOffset;
+ AliasResult AAResult =
+ AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
+ UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
+ MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
+ UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
+ if (AAResult == NoAlias)
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+ SmallVectorImpl<SDValue> &Aliases) {
+ SmallVector<SDValue, 8> Chains; // List of chains to visit.
+ SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
+
+ // Get alias information for node.
+ bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
+
+ // Starting off.
+ Chains.push_back(OriginalChain);
+ unsigned Depth = 0;
+
+ // Look at each chain and determine if it is an alias. If so, add it to the
+ // aliases list. If not, then continue up the chain looking for the next
+ // candidate.
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.pop_back_val();
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we reach the depth limit.
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
+ // chain we found before we hit a tokenfactor rather than the original
+ // chain.
+ if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ return;
+ }
+
+ // Don't bother if we've been before.
+ if (!Visited.insert(Chain.getNode()).second)
+ continue;
+
+ switch (Chain.getOpcode()) {
+ case ISD::EntryToken:
+ // Entry token is ideal chain operand, but handled in FindBetterChain.
+ break;
+
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for Chain.
+ bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
+ !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
+
+ // If chain is alias then stop here.
+ if (!(IsLoad && IsOpLoad) &&
+ isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
+ Aliases.push_back(Chain);
+ } else {
+ // Look further up the chain.
+ Chains.push_back(Chain.getOperand(0));
+ ++Depth;
+ }
+ break;
+ }
+
+ case ISD::TokenFactor:
+ // We have to check each of the operands of the token factor for "small"
+ // token factors, so we queue them up. Adding the operands to the queue
+ // (stack) in reverse order maintains the original order and increases the
+ // likelihood that getNode will find a matching token factor (CSE.)
+ if (Chain.getNumOperands() > 16) {
+ Aliases.push_back(Chain);
+ break;
+ }
+ for (unsigned n = Chain.getNumOperands(); n;)
+ Chains.push_back(Chain.getOperand(--n));
+ ++Depth;
+ break;
+
+ default:
+ // For all other instructions we will just have to take what we can get.
+ Aliases.push_back(Chain);
+ break;
+ }
+ }
+}
+
+/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
+/// (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+ SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
+
+ // Accumulate all the aliases to this node.
+ GatherAllAliases(N, OldChain, Aliases);
+
+ // If no operands then chain to entry token.
+ if (Aliases.size() == 0)
+ return DAG.getEntryNode();
+
+ // If a single operand then chain to it. We don't need to revisit it.
+ if (Aliases.size() == 1)
+ return Aliases[0];
+
+ // Construct a custom tailored token factor.
+ return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
+}
+
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+ // This holds the base pointer, index, and the offset in bytes from the base
+ // pointer.
+ BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+
+ // We must have a base and an offset.
+ if (!BasePtr.Base.getNode())
+ return false;
+
+ // Do not handle stores to undef base pointers.
+ if (BasePtr.Base.isUndef())
+ return false;
+
+ SmallVector<StoreSDNode *, 8> ChainedStores;
+ ChainedStores.push_back(St);
+
+ // Walk up the chain and look for nodes with offsets from the same
+ // base pointer. Stop when reaching an instruction with a different kind
+ // or instruction which has a different base pointer.
+ StoreSDNode *Index = St;
+ while (Index) {
+ // If the chain has more than one use, then we can't reorder the mem ops.
+ if (Index != St && !SDValue(Index, 0)->hasOneUse())
+ break;
+
+ if (Index->isVolatile() || Index->isIndexed())
+ break;
+
+ // Find the base pointer and offset for this memory node.
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
+
+ // Check that the base pointer is the same as the original one.
+ if (!Ptr.equalBaseIndex(BasePtr))
+ break;
+
+ // Find the next memory operand in the chain. If the next operand in the
+ // chain is a store then move up and continue the scan with the next
+ // memory operand. If the next operand is a load save it and use alias
+ // information to check if it interferes with anything.
+ SDNode *NextInChain = Index->getChain().getNode();
+ while (true) {
+ if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+ // We found a store node. Use it for the next iteration.
+ if (STn->isVolatile() || STn->isIndexed()) {
+ Index = nullptr;
+ break;
+ }
+ ChainedStores.push_back(STn);
+ Index = STn;
+ break;
+ } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+ NextInChain = Ldn->getChain().getNode();
+ continue;
+ } else {
+ Index = nullptr;
+ break;
+ }
+ }
+ }
+
+ bool MadeChangeToSt = false;
+ SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+
+ for (StoreSDNode *ChainedStore : ChainedStores) {
+ SDValue Chain = ChainedStore->getChain();
+ SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+
+ if (Chain != BetterChain) {
+ if (ChainedStore == St)
+ MadeChangeToSt = true;
+ BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
+ }
+ }
+
+ // Do all replacements after finding the replacements to make to avoid making
+ // the chains more complicated by introducing new TokenFactors.
+ for (auto Replacement : BetterChains)
+ replaceStoreChain(Replacement.first, Replacement.second);
+
+ return MadeChangeToSt;
+}
+
+/// This is the entry point for the file.
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+ CodeGenOpt::Level OptLevel) {
+ /// This is the main entry point to this class.
+ DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 000000000000..b10da002fcfe
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,2243 @@
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported. It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time. For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support. In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated. Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time. Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators. More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "isel"
+
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+ "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+ "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
+void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+ unsigned AttrIdx) {
+ IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
+ IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
+ IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
+ IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
+ IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
+ IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+ IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
+ IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
+ IsSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
+ IsSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
+ Alignment = CS->getParamAlignment(AttrIdx);
+}
+
+/// Set the current block to which generated machine instructions will be
+/// appended, and clear the local CSE map.
+void FastISel::startNewBlock() {
+ LocalValueMap.clear();
+
+ // Instructions are appended to FuncInfo.MBB. If the basic block already
+ // contains labels or copies, use the last instruction as the last local
+ // value.
+ EmitStartPt = nullptr;
+ if (!FuncInfo.MBB->empty())
+ EmitStartPt = &FuncInfo.MBB->back();
+ LastLocalValue = EmitStartPt;
+}
+
+bool FastISel::lowerArguments() {
+ if (!FuncInfo.CanLowerReturn)
+ // Fallback to SDISel argument lowering code to deal with sret pointer
+ // parameter.
+ return false;
+
+ if (!fastLowerArguments())
+ return false;
+
+ // Enter arguments into ValueMap for uses in non-entry BBs.
+ for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
+ E = FuncInfo.Fn->arg_end();
+ I != E; ++I) {
+ DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I);
+ assert(VI != LocalValueMap.end() && "Missed an argument?");
+ FuncInfo.ValueMap[&*I] = VI->second;
+ }
+ return true;
+}
+
+void FastISel::flushLocalValueMap() {
+ LocalValueMap.clear();
+ LastLocalValue = EmitStartPt;
+ recomputeInsertPt();
+ SavedInsertPt = FuncInfo.InsertPt;
+}
+
+bool FastISel::hasTrivialKill(const Value *V) {
+ // Don't consider constants or arguments to have trivial kills.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // No-op casts are trivially coalesced by fast-isel.
+ if (const auto *Cast = dyn_cast<CastInst>(I))
+ if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) &&
+ !hasTrivialKill(Cast->getOperand(0)))
+ return false;
+
+ // Even the value might have only one use in the LLVM IR, it is possible that
+ // FastISel might fold the use into another instruction and now there is more
+ // than one use at the Machine Instruction level.
+ unsigned Reg = lookUpRegForValue(V);
+ if (Reg && !MRI.use_empty(Reg))
+ return false;
+
+ // GEPs with all zero indices are trivially coalesced by fast-isel.
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
+ return false;
+
+ // Only instructions with a single use in the same basic block are considered
+ // to have trivial kills.
+ return I->hasOneUse() &&
+ !(I->getOpcode() == Instruction::BitCast ||
+ I->getOpcode() == Instruction::PtrToInt ||
+ I->getOpcode() == Instruction::IntToPtr) &&
+ cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
+}
+
+unsigned FastISel::getRegForValue(const Value *V) {
+ EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true);
+ // Don't handle non-simple values in FastISel.
+ if (!RealVT.isSimple())
+ return 0;
+
+ // Ignore illegal types. We must do this before looking up the value
+ // in ValueMap because Arguments are given virtual registers regardless
+ // of whether FastISel can handle them.
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+ else
+ return 0;
+ }
+
+ // Look up the value to see if we already have a register for it.
+ unsigned Reg = lookUpRegForValue(V);
+ if (Reg)
+ return Reg;
+
+ // In bottom-up mode, just create the virtual register which will be used
+ // to hold the value. It will be materialized later.
+ if (isa<Instruction>(V) &&
+ (!isa<AllocaInst>(V) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+ return FuncInfo.InitializeRegForValue(V);
+
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ // Materialize the value in a register. Emit any instructions in the
+ // local value area.
+ Reg = materializeRegForValue(V, VT);
+
+ leaveLocalValueArea(SaveInsertPt);
+
+ return Reg;
+}
+
+unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+ if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().getActiveBits() <= 64)
+ Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+ } else if (isa<AllocaInst>(V))
+ Reg = fastMaterializeAlloca(cast<AllocaInst>(V));
+ else if (isa<ConstantPointerNull>(V))
+ // Translate this as an integer zero so that it can be
+ // local-CSE'd with actual integer zeros.
+ Reg = getRegForValue(
+ Constant::getNullValue(DL.getIntPtrType(V->getContext())));
+ else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ if (CF->isNullValue())
+ Reg = fastMaterializeFloatZero(CF);
+ else
+ // Try to emit the constant directly.
+ Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF);
+
+ if (!Reg) {
+ // Try to emit the constant by using an integer constant with a cast.
+ const APFloat &Flt = CF->getValueAPF();
+ EVT IntVT = TLI.getPointerTy(DL);
+
+ uint64_t x[2];
+ uint32_t IntBitWidth = IntVT.getSizeInBits();
+ bool isExact;
+ (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+ APFloat::rmTowardZero, &isExact);
+ if (isExact) {
+ APInt IntVal(IntBitWidth, x);
+
+ unsigned IntegerReg =
+ getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ if (IntegerReg != 0)
+ Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
+ /*Kill=*/false);
+ }
+ }
+ } else if (const auto *Op = dyn_cast<Operator>(V)) {
+ if (!selectOperator(Op, Op->getOpcode()))
+ if (!isa<Instruction>(Op) ||
+ !fastSelectInstruction(cast<Instruction>(Op)))
+ return 0;
+ Reg = lookUpRegForValue(Op);
+ } else if (isa<UndefValue>(V)) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ }
+ return Reg;
+}
+
+/// Helper for getRegForValue. This function is called when the value isn't
+/// already available in a register and must be materialized with new
+/// instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+ unsigned Reg = 0;
+ // Give the target-specific code a try first.
+ if (isa<Constant>(V))
+ Reg = fastMaterializeConstant(cast<Constant>(V));
+
+ // If target-specific code couldn't or didn't want to handle the value, then
+ // give target-independent code a try.
+ if (!Reg)
+ Reg = materializeConstant(V, VT);
+
+ // Don't cache constant materializations in the general ValueMap.
+ // To do so would require tracking what uses they dominate.
+ if (Reg) {
+ LocalValueMap[V] = Reg;
+ LastLocalValue = MRI.getVRegDef(Reg);
+ }
+ return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(const Value *V) {
+ // Look up the value to see if we already have a register for it. We
+ // cache values defined by Instructions across blocks, and other values
+ // only locally. This is because Instructions already have the SSA
+ // def-dominates-use requirement enforced.
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
+ return LocalValueMap[V];
+}
+
+void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
+ if (!isa<Instruction>(I)) {
+ LocalValueMap[I] = Reg;
+ return;
+ }
+
+ unsigned &AssignedReg = FuncInfo.ValueMap[I];
+ if (AssignedReg == 0)
+ // Use the new register.
+ AssignedReg = Reg;
+ else if (Reg != AssignedReg) {
+ // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+ for (unsigned i = 0; i < NumRegs; i++)
+ FuncInfo.RegFixups[AssignedReg + i] = Reg + i;
+
+ AssignedReg = Reg;
+ }
+}
+
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+ unsigned IdxN = getRegForValue(Idx);
+ if (IdxN == 0)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
+
+ // If the index is smaller or larger than intptr_t, truncate or extend it.
+ MVT PtrVT = TLI.getPointerTy(DL);
+ EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN,
+ IdxNIsKill);
+ IdxNIsKill = true;
+ } else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN =
+ fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+void FastISel::recomputeInsertPt() {
+ if (getLastLocalValue()) {
+ FuncInfo.InsertPt = getLastLocalValue();
+ FuncInfo.MBB = FuncInfo.InsertPt->getParent();
+ ++FuncInfo.InsertPt;
+ } else
+ FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+ // Now skip past any EH_LABELs, which must remain at the beginning.
+ while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+ FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+ ++FuncInfo.InsertPt;
+}
+
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
+ assert(static_cast<MachineInstr *>(I) && static_cast<MachineInstr *>(E) &&
+ std::distance(I, E) > 0 && "Invalid iterator!");
+ while (I != E) {
+ MachineInstr *Dead = &*I;
+ ++I;
+ Dead->eraseFromParent();
+ ++NumFastIselDead;
+ }
+ recomputeInsertPt();
+}
+
+FastISel::SavePoint FastISel::enterLocalValueArea() {
+ MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+ DebugLoc OldDL = DbgLoc;
+ recomputeInsertPt();
+ DbgLoc = DebugLoc();
+ SavePoint SP = {OldInsertPt, OldDL};
+ return SP;
+}
+
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
+ if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+ LastLocalValue = &*std::prev(FuncInfo.InsertPt);
+
+ // Restore the previous insert position.
+ FuncInfo.InsertPt = OldInsertPt.InsertPt;
+ DbgLoc = OldInsertPt.DL;
+}
+
+bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
+ EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !VT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // We only handle legal types. For example, on x86-32 the instruction
+ // selector contains all of the 64-bit instructions from x86-64,
+ // under the assumption that i64 won't be used if the target doesn't
+ // support it.
+ if (!TLI.isTypeLegal(VT)) {
+ // MVT::i1 is special. Allow AND, OR, or XOR because they
+ // don't require additional zeroing, which makes them easy.
+ if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+ ISDOpcode == ISD::XOR))
+ VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+ else
+ return false;
+ }
+
+ // Check if the first operand is a constant, and handle it as "ri". At -O0,
+ // we don't have anything that canonicalizes operand order.
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0)))
+ if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) {
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (!Op1)
+ return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ unsigned ResultReg =
+ fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill,
+ CI->getZExtValue(), VT.getSimpleVT());
+ if (!ResultReg)
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // Check if the second operand is a constant and handle it appropriately.
+ if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ uint64_t Imm = CI->getSExtValue();
+
+ // Transform "sdiv exact X, 8" -> "sra X, 3".
+ if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
+ cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) {
+ Imm = Log2_64(Imm);
+ ISDOpcode = ISD::SRA;
+ }
+
+ // Transform "urem x, pow2" -> "and x, pow2-1".
+ if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+ isPowerOf2_64(Imm)) {
+ --Imm;
+ ISDOpcode = ISD::AND;
+ }
+
+ unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
+ Op0IsKill, Imm, VT.getSimpleVT());
+ if (!ResultReg)
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Check if the second operand is a constant float.
+ if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+ unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op0IsKill, CF);
+ if (ResultReg) {
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ unsigned Op1 = getRegForValue(I->getOperand(1));
+ if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+ // Now we have both operands in registers. Emit the instruction.
+ unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+ ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill);
+ if (!ResultReg)
+ // Target-specific code wasn't able to find a machine opcode for
+ // the given ISD opcode and type. Halt "fast" selection and bail.
+ return false;
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectGetElementPtr(const User *I) {
+ unsigned N = getRegForValue(I->getOperand(0));
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+ // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+ // into a single N = N + TotalOffset.
+ uint64_t TotalOffs = 0;
+ // FIXME: What's a good SWAG number for MaxOffs?
+ uint64_t MaxOffs = 2048;
+ MVT VT = TLI.getPointerTy(DL);
+ for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (auto *StTy = dyn_cast<StructType>(*GTI)) {
+ uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
+ if (TotalOffs >= MaxOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ }
+ } else {
+ Type *Ty = GTI.getIndexedType();
+
+ // If this is a constant subscript, handle it quickly.
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
+ // N = N + Offset
+ uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
+ TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
+ if (TotalOffs >= MaxOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+ continue;
+ }
+ if (TotalOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ NIsKill = true;
+ TotalOffs = 0;
+ }
+
+ // N = N + Idx * ElementSize;
+ uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ if (ElementSize != 1) {
+ IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+ if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ IdxNIsKill = true;
+ }
+ N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+ }
+ if (TotalOffs) {
+ N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+ if (!N) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ }
+
+ // We successfully emitted code for the given LLVM Instruction.
+ updateValueMap(I, N);
+ return true;
+}
+
+bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
+ const CallInst *CI, unsigned StartIdx) {
+ for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
+ Value *Val = CI->getArgOperand(i);
+ // Check for constants and encode them with a StackMaps::ConstantOp prefix.
+ if (const auto *C = dyn_cast<ConstantInt>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(C->getSExtValue()));
+ } else if (isa<ConstantPointerNull>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(0));
+ } else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
+ // Values coming from a stack location also require a sepcial encoding,
+ // but that is added later on by the target specific frame index
+ // elimination implementation.
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ Ops.push_back(MachineOperand::CreateFI(SI->second));
+ else
+ return false;
+ } else {
+ unsigned Reg = getRegForValue(Val);
+ if (!Reg)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ }
+ }
+ return true;
+}
+
+bool FastISel::selectStackmap(const CallInst *I) {
+ // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+ assert(I->getCalledFunction()->getReturnType()->isVoidTy() &&
+ "Stackmap cannot return a value.");
+
+ // The stackmap intrinsic only records the live variables (the arguments
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target-specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // CALLSEQ_START(0...)
+ // STACKMAP(id, nbytes, ...)
+ // CALLSEQ_END(0, 0)
+ //
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Push live variables for the stack map (skipping the first two arguments
+ // <id> and <numBytes>).
+ if (!addStackMapLiveVars(Ops, I, 2))
+ return false;
+
+ // We are not adding any register mask info here, because the stackmap doesn't
+ // clobber anything.
+
+ // Add scratch registers as implicit def and early clobber.
+ CallingConv::ID CC = I->getCallingConv();
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
+ /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ auto Builder =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown));
+ const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
+ for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
+ Builder.addImm(0);
+
+ // Issue STACKMAP.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::STACKMAP));
+ for (auto const &MO : Ops)
+ MIB.addOperand(MO);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
+ .addImm(0)
+ .addImm(0);
+
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo()->setHasStackMap();
+
+ return true;
+}
+
+/// \brief Lower an argument list according to the target calling convention.
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
+ unsigned NumArgs, const Value *Callee,
+ bool ForceRetVoidTy, CallLoweringInfo &CLI) {
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ ImmutableCallSite CS(CI);
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ ArgI != ArgE; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, AttrI);
+ Args.push_back(Entry);
+ }
+
+ Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext())
+ : CI->getType();
+ CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee(
+ const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy,
+ const char *Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
+ SmallString<32> MangledName;
+ Mangler::getNameWithPrefix(MangledName, Target, DL);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
+ return setCallee(CC, ResultTy, Sym, std::move(ArgsList), FixedArgs);
+}
+
+bool FastISel::selectPatchpoint(const CallInst *I) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+ CallingConv::ID CC = I->getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !I->getType()->isVoidTy();
+ Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts();
+
+ // Get the real number of arguments participating in the call <numArgs>
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) &&
+ "Expected a constant integer.");
+ const auto *NumArgsVal =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos));
+ unsigned NumArgs = NumArgsVal->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ // This includes all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+ CallLoweringInfo CLI;
+ CLI.setIsPatchPoint();
+ if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI))
+ return false;
+
+ assert(CLI.Call && "No call instruction specified.");
+
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add an explicit result reg if we use the anyreg calling convention.
+ if (IsAnyRegCC && HasDef) {
+ assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
+ CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
+ CLI.NumResultRegs = 1;
+ Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true));
+ }
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Add the call target.
+ if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) {
+ uint64_t CalleeConstAddr =
+ cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+ } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) {
+ if (C->getOpcode() == Instruction::IntToPtr) {
+ uint64_t CalleeConstAddr =
+ cast<ConstantInt>(C->getOperand(0))->getZExtValue();
+ Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr));
+ } else
+ llvm_unreachable("Unsupported ConstantExpr.");
+ } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) {
+ Ops.push_back(MachineOperand::CreateGA(GV, 0));
+ } else if (isa<ConstantPointerNull>(Callee))
+ Ops.push_back(MachineOperand::CreateImm(0));
+ else
+ llvm_unreachable("Unsupported callee address.");
+
+ // Adjust <numArgs> to account for any arguments that have been passed on
+ // the stack instead.
+ unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size();
+ Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs));
+
+ // Add the calling convention
+ Ops.push_back(MachineOperand::CreateImm((unsigned)CC));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (IsAnyRegCC) {
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) {
+ unsigned Reg = getRegForValue(I->getArgOperand(i));
+ if (!Reg)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ }
+ }
+
+ // Push the arguments from the call instruction.
+ for (auto Reg : CLI.OutRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+
+ // Push live variables for the stack map.
+ if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
+ return false;
+
+ // Push the register mask info.
+ Ops.push_back(MachineOperand::CreateRegMask(
+ TRI.getCallPreservedMask(*FuncInfo.MF, CC)));
+
+ // Add scratch registers as implicit def and early clobber.
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
+ /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+
+ // Add implicit defs (return values).
+ for (auto Reg : CLI.InRegs)
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true,
+ /*IsImpl=*/true));
+
+ // Insert the patchpoint instruction before the call generated by the target.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
+ TII.get(TargetOpcode::PATCHPOINT));
+
+ for (auto &MO : Ops)
+ MIB.addOperand(MO);
+
+ MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ // Delete the original call instruction.
+ CLI.Call->eraseFromParent();
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+
+ if (CLI.NumResultRegs)
+ updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs);
+ return true;
+}
+
+/// Returns an AttributeSet representing the attributes applied to the return
+/// value of the given call.
+static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
+ Attrs);
+}
+
+bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
+ unsigned NumArgs) {
+ MCContext &Ctx = MF->getContext();
+ SmallString<32> MangledName;
+ Mangler::getNameWithPrefix(MangledName, SymName, DL);
+ MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
+ return lowerCallTo(CI, Sym, NumArgs);
+}
+
+bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
+ unsigned NumArgs) {
+ ImmutableCallSite CS(CI);
+
+ FunctionType *FTy = CS.getFunctionType();
+ Type *RetTy = CS.getType();
+
+ ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) {
+ Value *V = CI->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ ArgListEntry Entry;
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, ArgI + 1);
+ Args.push_back(Entry);
+ }
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
+ // Handle the incoming return values from the call.
+ CLI.clearIns();
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys);
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
+
+ bool CanLowerReturn = TLI.CanLowerReturn(
+ CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ // FIXME: sret demotion isn't supported yet - bail out.
+ if (!CanLowerReturn)
+ return false;
+
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+
+ // Handle all of the outgoing arguments.
+ CLI.clearOuts();
+ for (auto &Arg : CLI.getArgs()) {
+ Type *FinalType = Arg.Ty;
+ if (Arg.IsByVal)
+ FinalType = cast<PointerType>(Arg.Ty)->getElementType();
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ FinalType, CLI.CallConv, CLI.IsVarArg);
+
+ ISD::ArgFlagsTy Flags;
+ if (Arg.IsZExt)
+ Flags.setZExt();
+ if (Arg.IsSExt)
+ Flags.setSExt();
+ if (Arg.IsInReg)
+ Flags.setInReg();
+ if (Arg.IsSRet)
+ Flags.setSRet();
+ if (Arg.IsSwiftSelf)
+ Flags.setSwiftSelf();
+ if (Arg.IsSwiftError)
+ Flags.setSwiftError();
+ if (Arg.IsByVal)
+ Flags.setByVal();
+ if (Arg.IsInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling in
+ // the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Arg.IsByVal || Arg.IsInAlloca) {
+ PointerType *Ty = cast<PointerType>(Arg.Ty);
+ Type *ElementTy = Ty->getElementType();
+ unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
+ // For ByVal, alignment should come from FE. BE will guess if this info is
+ // not there, but there are cases it cannot get right.
+ unsigned FrameAlign = Arg.Alignment;
+ if (!FrameAlign)
+ FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
+ Flags.setByValSize(FrameSize);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (Arg.IsNest)
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ CLI.OutVals.push_back(Arg.Val);
+ CLI.OutFlags.push_back(Flags);
+ }
+
+ if (!fastLowerCall(CLI))
+ return false;
+
+ // Set all unused physreg defs as dead.
+ assert(CLI.Call && "No call instruction specified.");
+ CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI);
+
+ if (CLI.NumResultRegs && CLI.CS)
+ updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
+
+ return true;
+}
+
+bool FastISel::lowerCall(const CallInst *CI) {
+ ImmutableCallSite CS(CI);
+
+ FunctionType *FuncTy = CS.getFunctionType();
+ Type *RetTy = CS.getType();
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ Entry.Val = V;
+ Entry.Ty = V->getType();
+
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Args.push_back(Entry);
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within fastLowerCall.
+ bool IsTailCall = CI->isTailCall();
+ if (IsTailCall && !isInTailCallPosition(CS, TM))
+ IsTailCall = false;
+
+ CallLoweringInfo CLI;
+ CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
+ .setTailCall(IsTailCall);
+
+ return lowerCallTo(CLI);
+}
+
+bool FastISel::selectCall(const User *I) {
+ const CallInst *Call = cast<CallInst>(I);
+
+ // Handle simple inline asms.
+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) {
+ // If the inline asm has side effects, then make sure that no local value
+ // lives across by flushing the local value map.
+ if (IA->hasSideEffects())
+ flushLocalValueMap();
+
+ // Don't attempt to handle constraints.
+ if (!IA->getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::INLINEASM))
+ .addExternalSymbol(IA->getAsmString().c_str())
+ .addImm(ExtraInfo);
+ return true;
+ }
+
+ MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+ ComputeUsesVAFloatArgument(*Call, &MMI);
+
+ // Handle intrinsic function calls.
+ if (const auto *II = dyn_cast<IntrinsicInst>(Call))
+ return selectIntrinsicCall(II);
+
+ // Usually, it does not make sense to initialize a value,
+ // make an unrelated function call and use the value, because
+ // it tends to be spilled on the stack. So, we move the pointer
+ // to the last local value to the beginning of the block, so that
+ // all the values which have already been materialized,
+ // appear after the call. It also makes sense to skip intrinsics
+ // since they tend to be inlined.
+ flushLocalValueMap();
+
+ return lowerCall(Call);
+}
+
+bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ // At -O0 we don't care about the lifetime intrinsics.
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // The donothing intrinsic does, well, nothing.
+ case Intrinsic::donothing:
+ return true;
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
+ assert(DI->getVariable() && "Missing variable");
+ if (!FuncInfo.MF->getMMI().hasDebugInfo()) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ const Value *Address = DI->getAddress();
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ return true;
+ }
+
+ unsigned Offset = 0;
+ Optional<MachineOperand> Op;
+ if (const auto *Arg = dyn_cast<Argument>(Address))
+ // Some arguments' frame index is recorded during argument lowering.
+ Offset = FuncInfo.getArgumentFrameIndex(Arg);
+ if (Offset)
+ Op = MachineOperand::CreateFI(Offset);
+ if (!Op)
+ if (unsigned Reg = lookUpRegForValue(Address))
+ Op = MachineOperand::CreateReg(Reg, false);
+
+ // If we have a VLA that has a "use" in a metadata node that's then used
+ // here but it has no other uses, then we have a problem. E.g.,
+ //
+ // int foo (const int *x) {
+ // char a[*x];
+ // return 0;
+ // }
+ //
+ // If we assign 'a' a vreg and fast isel later on has to use the selection
+ // DAG isel, it will want to copy the value to the vreg. However, there are
+ // no uses, which goes counter to what selection DAG isel expects.
+ if (!Op && !Address->use_empty() && isa<Instruction>(Address) &&
+ (!isa<AllocaInst>(Address) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+ Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address),
+ false);
+
+ if (Op) {
+ assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ "Expected inlined-at fields to agree");
+ if (Op->isReg()) {
+ Op->setIsDebug(true);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0,
+ DI->getVariable(), DI->getExpression());
+ } else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(*Op)
+ .addImm(0)
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ }
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst *DI = cast<DbgValueInst>(II);
+ const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ const Value *V = DI->getValue();
+ assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ "Expected inlined-at fields to agree");
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(0U)
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addCImm(CI)
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addImm(CI->getZExtValue())
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addFPImm(CF)
+ .addImm(DI->getOffset())
+ .addMetadata(DI->getVariable())
+ .addMetadata(DI->getExpression());
+ } else if (unsigned Reg = lookUpRegForValue(V)) {
+ // FIXME: This does not handle register-indirect values at offset 0.
+ bool IsIndirect = DI->getOffset() != 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
+ DI->getOffset(), DI->getVariable(), DI->getExpression());
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ }
+ return true;
+ }
+ case Intrinsic::objectsize: {
+ ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
+ unsigned long long Res = CI->isZero() ? -1ULL : 0;
+ Constant *ResCI = ConstantInt::get(II->getType(), Res);
+ unsigned ResultReg = getRegForValue(ResCI);
+ if (!ResultReg)
+ return false;
+ updateValueMap(II, ResultReg);
+ return true;
+ }
+ case Intrinsic::expect: {
+ unsigned ResultReg = getRegForValue(II->getArgOperand(0));
+ if (!ResultReg)
+ return false;
+ updateValueMap(II, ResultReg);
+ return true;
+ }
+ case Intrinsic::experimental_stackmap:
+ return selectStackmap(II);
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ return selectPatchpoint(II);
+ }
+
+ return fastLowerIntrinsicCall(II);
+}
+
+bool FastISel::selectCast(const User *I, unsigned Opcode) {
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+
+ if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other ||
+ !DstVT.isSimple())
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ // Check if the destination type is legal.
+ if (!TLI.isTypeLegal(DstVT))
+ return false;
+
+ // Check if the source operand is legal.
+ if (!TLI.isTypeLegal(SrcVT))
+ return false;
+
+ unsigned InputReg = getRegForValue(I->getOperand(0));
+ if (!InputReg)
+ // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+
+ bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
+ unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+ Opcode, InputReg, InputRegIsKill);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectBitCast(const User *I) {
+ // If the bitcast doesn't change the type, just use the operand value.
+ if (I->getType() == I->getOperand(0)->getType()) {
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (!Reg)
+ return false;
+ updateValueMap(I, Reg);
+ return true;
+ }
+
+ // Bitcasts of other values become reg-reg copies or BITCAST operators.
+ EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstEVT = TLI.getValueType(DL, I->getType());
+ if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
+ !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
+ // Unhandled type. Halt "fast" selection and bail.
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DstVT = DstEVT.getSimpleVT();
+ unsigned Op0 = getRegForValue(I->getOperand(0));
+ if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
+ return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+ // First, try to perform the bitcast by inserting a reg-reg copy.
+ unsigned ResultReg = 0;
+ if (SrcVT == DstVT) {
+ const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT);
+ const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
+ // Don't attempt a cross-class copy. It will likely fail.
+ if (SrcClass == DstClass) {
+ ResultReg = createResultReg(DstClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0);
+ }
+ }
+
+ // If the reg-reg copy failed, select a BITCAST opcode.
+ if (!ResultReg)
+ ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+// Return true if we should copy from swift error to the final vreg as specified
+// by SwiftErrorWorklist.
+static bool shouldCopySwiftErrorsToFinalVRegs(const TargetLowering &TLI,
+ FunctionLoweringInfo &FuncInfo) {
+ if (!TLI.supportSwiftError())
+ return false;
+ return FuncInfo.SwiftErrorWorklist.count(FuncInfo.MBB);
+}
+
+// Remove local value instructions starting from the instruction after
+// SavedLastLocalValue to the current function insert point.
+void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
+{
+ MachineInstr *CurLastLocalValue = getLastLocalValue();
+ if (CurLastLocalValue != SavedLastLocalValue) {
+ // Find the first local value instruction to be deleted.
+ // This is the instruction after SavedLastLocalValue if it is non-NULL.
+ // Otherwise it's the first instruction in the block.
+ MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
+ if (SavedLastLocalValue)
+ ++FirstDeadInst;
+ else
+ FirstDeadInst = FuncInfo.MBB->getFirstNonPHI();
+ setLastLocalValue(SavedLastLocalValue);
+ removeDeadCode(FirstDeadInst, FuncInfo.InsertPt);
+ }
+}
+
+bool FastISel::selectInstruction(const Instruction *I) {
+ MachineInstr *SavedLastLocalValue = getLastLocalValue();
+ // Just before the terminator instruction, insert instructions to
+ // feed PHI nodes in successor blocks.
+ if (isa<TerminatorInst>(I)) {
+ // If we need to materialize any vreg from worklist, we bail out of
+ // FastISel.
+ if (shouldCopySwiftErrorsToFinalVRegs(TLI, FuncInfo))
+ return false;
+ if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
+ // PHI node handling may have generated local value instructions,
+ // even though it failed to handle all PHI nodes.
+ // We remove these instructions because SelectionDAGISel will generate
+ // them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
+ return false;
+ }
+ }
+
+ // FastISel does not handle any operand bundles except OB_funclet.
+ if (ImmutableCallSite CS = ImmutableCallSite(I))
+ for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i)
+ if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet)
+ return false;
+
+ DbgLoc = I->getDebugLoc();
+
+ SavedInsertPt = FuncInfo.InsertPt;
+
+ if (const auto *Call = dyn_cast<CallInst>(I)) {
+ const Function *F = Call->getCalledFunction();
+ LibFunc::Func Func;
+
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
+ if (F && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func))
+ return false;
+
+ // Don't handle Intrinsic::trap if a trap function is specified.
+ if (F && F->getIntrinsicID() == Intrinsic::trap &&
+ Call->hasFnAttr("trap-func-name"))
+ return false;
+ }
+
+ // First, try doing target-independent selection.
+ if (!SkipTargetIndependentISel) {
+ if (selectOperator(I, I->getOpcode())) {
+ ++NumFastIselSuccessIndependent;
+ DbgLoc = DebugLoc();
+ return true;
+ }
+ // Remove dead code.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+ SavedInsertPt = FuncInfo.InsertPt;
+ }
+ // Next, try calling the target to attempt to handle the instruction.
+ if (fastSelectInstruction(I)) {
+ ++NumFastIselSuccessTarget;
+ DbgLoc = DebugLoc();
+ return true;
+ }
+ // Remove dead code.
+ recomputeInsertPt();
+ if (SavedInsertPt != FuncInfo.InsertPt)
+ removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+
+ DbgLoc = DebugLoc();
+ // Undo phi node updates, because they will be added again by SelectionDAG.
+ if (isa<TerminatorInst>(I)) {
+ // PHI node handling may have generated local value instructions.
+ // We remove them because SelectionDAGISel will generate them again.
+ removeDeadLocalValueCode(SavedLastLocalValue);
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ }
+ return false;
+}
+
+/// Emit an unconditional branch to the given block, unless it is the immediate
+/// (fall-through) successor, and update the CFG.
+void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
+ const DebugLoc &DbgLoc) {
+ if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
+ FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+ // For more accurate line information if this is the only instruction
+ // in the block then emit it, otherwise we have the unconditional
+ // fall-through case, which needs no instructions.
+ } else {
+ // The unconditional branch case.
+ TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
+ SmallVector<MachineOperand, 0>(), DbgLoc);
+ }
+ if (FuncInfo.BPI) {
+ auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+ FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(MSucc, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(MSucc);
+}
+
+void FastISel::finishCondBranch(const BasicBlock *BranchBB,
+ MachineBasicBlock *TrueMBB,
+ MachineBasicBlock *FalseMBB) {
+ // Add TrueMBB as successor unless it is equal to the FalseMBB: This can
+ // happen in degenerate IR and MachineIR forbids to have a block twice in the
+ // successor/predecessor lists.
+ if (TrueMBB != FalseMBB) {
+ if (FuncInfo.BPI) {
+ auto BranchProbability =
+ FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
+ }
+
+ fastEmitBranch(FalseMBB, DbgLoc);
+}
+
+/// Emit an FNeg operation.
+bool FastISel::selectFNeg(const User *I) {
+ unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+ if (!OpReg)
+ return false;
+ bool OpRegIsKill = hasTrivialKill(I);
+
+ // If the target has ISD::FNEG, use it.
+ EVT VT = TLI.getValueType(DL, I->getType());
+ unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
+ OpReg, OpRegIsKill);
+ if (ResultReg) {
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ // Bitcast the value to integer, twiddle the sign bit with xor,
+ // and then bitcast it back to floating-point.
+ if (VT.getSizeInBits() > 64)
+ return false;
+ EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+ if (!TLI.isTypeLegal(IntVT))
+ return false;
+
+ unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+ ISD::BITCAST, OpReg, OpRegIsKill);
+ if (!IntReg)
+ return false;
+
+ unsigned IntResultReg = fastEmit_ri_(
+ IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true,
+ UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
+ if (!IntResultReg)
+ return false;
+
+ ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST,
+ IntResultReg, /*IsKill=*/true);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
+bool FastISel::selectExtractValue(const User *U) {
+ const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U);
+ if (!EVI)
+ return false;
+
+ // Make sure we only try to handle extracts with a legal result. But also
+ // allow i1 because it's easy.
+ EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true);
+ if (!RealVT.isSimple())
+ return false;
+ MVT VT = RealVT.getSimpleVT();
+ if (!TLI.isTypeLegal(VT) && VT != MVT::i1)
+ return false;
+
+ const Value *Op0 = EVI->getOperand(0);
+ Type *AggTy = Op0->getType();
+
+ // Get the base result register.
+ unsigned ResultReg;
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0);
+ if (I != FuncInfo.ValueMap.end())
+ ResultReg = I->second;
+ else if (isa<Instruction>(Op0))
+ ResultReg = FuncInfo.InitializeRegForValue(Op0);
+ else
+ return false; // fast-isel can't handle aggregate constants at the moment
+
+ // Get the actual result register, which is an offset from the base register.
+ unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
+
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, DL, AggTy, AggValueVTs);
+
+ for (unsigned i = 0; i < VTIndex; i++)
+ ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
+
+ updateValueMap(EVI, ResultReg);
+ return true;
+}
+
+bool FastISel::selectOperator(const User *I, unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Add:
+ return selectBinaryOp(I, ISD::ADD);
+ case Instruction::FAdd:
+ return selectBinaryOp(I, ISD::FADD);
+ case Instruction::Sub:
+ return selectBinaryOp(I, ISD::SUB);
+ case Instruction::FSub:
+ // FNeg is currently represented in LLVM IR as a special case of FSub.
+ if (BinaryOperator::isFNeg(I))
+ return selectFNeg(I);
+ return selectBinaryOp(I, ISD::FSUB);
+ case Instruction::Mul:
+ return selectBinaryOp(I, ISD::MUL);
+ case Instruction::FMul:
+ return selectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return selectBinaryOp(I, ISD::SDIV);
+ case Instruction::UDiv:
+ return selectBinaryOp(I, ISD::UDIV);
+ case Instruction::FDiv:
+ return selectBinaryOp(I, ISD::FDIV);
+ case Instruction::SRem:
+ return selectBinaryOp(I, ISD::SREM);
+ case Instruction::URem:
+ return selectBinaryOp(I, ISD::UREM);
+ case Instruction::FRem:
+ return selectBinaryOp(I, ISD::FREM);
+ case Instruction::Shl:
+ return selectBinaryOp(I, ISD::SHL);
+ case Instruction::LShr:
+ return selectBinaryOp(I, ISD::SRL);
+ case Instruction::AShr:
+ return selectBinaryOp(I, ISD::SRA);
+ case Instruction::And:
+ return selectBinaryOp(I, ISD::AND);
+ case Instruction::Or:
+ return selectBinaryOp(I, ISD::OR);
+ case Instruction::Xor:
+ return selectBinaryOp(I, ISD::XOR);
+
+ case Instruction::GetElementPtr:
+ return selectGetElementPtr(I);
+
+ case Instruction::Br: {
+ const BranchInst *BI = cast<BranchInst>(I);
+
+ if (BI->isUnconditional()) {
+ const BasicBlock *LLVMSucc = BI->getSuccessor(0);
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+ fastEmitBranch(MSucc, BI->getDebugLoc());
+ return true;
+ }
+
+ // Conditional branches are not handed yet.
+ // Halt "fast" selection and bail.
+ return false;
+ }
+
+ case Instruction::Unreachable:
+ if (TM.Options.TrapUnreachable)
+ return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0;
+ else
+ return true;
+
+ case Instruction::Alloca:
+ // FunctionLowering has the static-sized case covered.
+ if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
+ return true;
+
+ // Dynamic-sized alloca is not handled yet.
+ return false;
+
+ case Instruction::Call:
+ return selectCall(I);
+
+ case Instruction::BitCast:
+ return selectBitCast(I);
+
+ case Instruction::FPToSI:
+ return selectCast(I, ISD::FP_TO_SINT);
+ case Instruction::ZExt:
+ return selectCast(I, ISD::ZERO_EXTEND);
+ case Instruction::SExt:
+ return selectCast(I, ISD::SIGN_EXTEND);
+ case Instruction::Trunc:
+ return selectCast(I, ISD::TRUNCATE);
+ case Instruction::SIToFP:
+ return selectCast(I, ISD::SINT_TO_FP);
+
+ case Instruction::IntToPtr: // Deliberate fall-through.
+ case Instruction::PtrToInt: {
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+ if (DstVT.bitsGT(SrcVT))
+ return selectCast(I, ISD::ZERO_EXTEND);
+ if (DstVT.bitsLT(SrcVT))
+ return selectCast(I, ISD::TRUNCATE);
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (!Reg)
+ return false;
+ updateValueMap(I, Reg);
+ return true;
+ }
+
+ case Instruction::ExtractValue:
+ return selectExtractValue(I);
+
+ case Instruction::PHI:
+ llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
+ default:
+ // Unhandled instruction. Halt "fast" selection and bail.
+ return false;
+ }
+}
+
+FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo,
+ bool SkipTargetIndependentISel)
+ : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
+ MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()),
+ TII(*MF->getSubtarget().getInstrInfo()),
+ TLI(*MF->getSubtarget().getTargetLowering()),
+ TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
+ SkipTargetIndependentISel(SkipTargetIndependentISel) {}
+
+FastISel::~FastISel() {}
+
+bool FastISel::fastLowerArguments() { return false; }
+
+bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; }
+
+bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) {
+ return false;
+}
+
+unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; }
+
+unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, unsigned /*Op1*/,
+ bool /*Op1IsKill*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_f(MVT, MVT, unsigned,
+ const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, uint64_t /*Imm*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/,
+ const ConstantFP * /*FPImm*/) {
+ return 0;
+}
+
+unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/,
+ bool /*Op0IsKill*/, unsigned /*Op1*/,
+ bool /*Op1IsKill*/, uint64_t /*Imm*/) {
+ return 0;
+}
+
+/// This method is a wrapper of fastEmit_ri. It first tries to emit an
+/// instruction with an immediate operand using fastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// fastEmit_rr instead.
+unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm, MVT ImmType) {
+ // If this is a multiply by a power of two, emit this as a shift left.
+ if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) {
+ Opcode = ISD::SHL;
+ Imm = Log2_64(Imm);
+ } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) {
+ // div x, 8 -> srl x, 3
+ Opcode = ISD::SRL;
+ Imm = Log2_64(Imm);
+ }
+
+ // Horrible hack (to be removed), check to make sure shift amounts are
+ // in-range.
+ if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) &&
+ Imm >= VT.getSizeInBits())
+ return 0;
+
+ // First check if immediate type is legal. If not, we can't use the ri form.
+ unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+ if (ResultReg)
+ return ResultReg;
+ unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+ bool IsImmKill = true;
+ if (!MaterialReg) {
+ // This is a bit ugly/slow, but failing here means falling out of
+ // fast-isel, which would be very slow.
+ IntegerType *ITy =
+ IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits());
+ MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm));
+ if (!MaterialReg)
+ return 0;
+ // FIXME: If the materialized register here has no uses yet then this
+ // will be the first use and we should be able to mark it as killed.
+ // However, the local value area for materialising constant expressions
+ // grows down, not up, which means that any constant expressions we generate
+ // later which also use 'Imm' could be after this instruction and therefore
+ // after this kill.
+ IsImmKill = false;
+ }
+ return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
+ return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
+ unsigned OpNum) {
+ if (TargetRegisterInfo::isVirtualRegister(Op)) {
+ const TargetRegisterClass *RegClass =
+ TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
+ if (!MRI.constrainRegClass(Op, RegClass)) {
+ // If it's not legal to COPY between the register classes, something
+ // has gone very wrong before we got here.
+ unsigned NewOp = createResultReg(RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), NewOp).addReg(Op);
+ return NewOp;
+ }
+ }
+ return Op;
+}
+
+unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill));
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, unsigned Op2,
+ bool Op2IsKill) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+ Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill));
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addReg(Op2, getKillRegState(Op2IsKill));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, uint64_t Imm1,
+ uint64_t Imm2) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addImm(Imm1)
+ .addImm(Imm2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ const ConstantFP *FPImm) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addFPImm(FPImm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addFPImm(FPImm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, unsigned Op0,
+ bool Op0IsKill, unsigned Op1,
+ bool Op1IsKill, uint64_t Imm) {
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(Op0, getKillRegState(Op0IsKill))
+ .addReg(Op1, getKillRegState(Op1IsKill))
+ .addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC, uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+ .addImm(Imm);
+ else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+ }
+ return ResultReg;
+}
+
+unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
+ bool Op0IsKill, uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ const TargetRegisterClass *RC = MRI.getRegClass(Op0);
+ MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx);
+ return ResultReg;
+}
+
+/// Emit MachineInstrs to compute the value of Op with all but the least
+/// significant bit set to zero.
+unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+}
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input. We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB. As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+ FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+
+ // Check successor nodes' PHI nodes that expect a constant to be available
+ // from this block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin()))
+ continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::const_iterator I = SuccBB->begin();
+ const auto *PN = dyn_cast<PHINode>(I); ++I) {
+
+ // Ignore dead phi's.
+ if (PN->use_empty())
+ continue;
+
+ // Only handle legal types. Two interesting things to note here. First,
+ // by bailing out early, we may leave behind some dead instructions,
+ // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+ // own moves. Second, this check is necessary because FastISel doesn't
+ // use CreateRegs to create registers, so it always creates
+ // exactly one register for each non-void instruction.
+ EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true);
+ if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+ // Handle integer promotions, though, because they're common and easy.
+ if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ return false;
+ }
+ }
+
+ const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ // Set the DebugLoc for the copy. Prefer the location of the operand
+ // if there is one; use the location of the PHI otherwise.
+ DbgLoc = PN->getDebugLoc();
+ if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
+ DbgLoc = Inst->getDebugLoc();
+
+ unsigned Reg = getRegForValue(PHIOp);
+ if (!Reg) {
+ FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+ return false;
+ }
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg));
+ DbgLoc = DebugLoc();
+ }
+ }
+
+ return true;
+}
+
+bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
+ assert(LI->hasOneUse() &&
+ "tryToFoldLoad expected a LoadInst with a single use");
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->user_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->user_back();
+ }
+
+ // If we didn't find the fold instruction, then we failed to collapse the
+ // sequence.
+ if (TheUser != FoldInst)
+ return false;
+
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile())
+ return false;
+
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
+ unsigned LoadReg = getRegForValue(LI);
+ if (!LoadReg)
+ return false;
+
+ // We can't fold if this vreg has no uses or more than one use. Multiple uses
+ // may mean that the instruction got lowered to multiple MIs, or the use of
+ // the loaded value ended up being multiple operands of the result.
+ if (!MRI.hasOneUse(LoadReg))
+ return false;
+
+ MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
+ MachineInstr *User = RI->getParent();
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes; make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo.InsertPt = User;
+ FuncInfo.MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
+}
+
+bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
+ // Must be an add.
+ if (!isa<AddOperator>(Add))
+ return false;
+ // Type size needs to match.
+ if (DL.getTypeSizeInBits(GEP->getType()) !=
+ DL.getTypeSizeInBits(Add->getType()))
+ return false;
+ // Must be in the same basic block.
+ if (isa<Instruction>(Add) &&
+ FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB)
+ return false;
+ // Must have a constant operand.
+ return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
+}
+
+MachineMemOperand *
+FastISel::createMachineMemOperandFor(const Instruction *I) const {
+ const Value *Ptr;
+ Type *ValTy;
+ unsigned Alignment;
+ MachineMemOperand::Flags Flags;
+ bool IsVolatile;
+
+ if (const auto *LI = dyn_cast<LoadInst>(I)) {
+ Alignment = LI->getAlignment();
+ IsVolatile = LI->isVolatile();
+ Flags = MachineMemOperand::MOLoad;
+ Ptr = LI->getPointerOperand();
+ ValTy = LI->getType();
+ } else if (const auto *SI = dyn_cast<StoreInst>(I)) {
+ Alignment = SI->getAlignment();
+ IsVolatile = SI->isVolatile();
+ Flags = MachineMemOperand::MOStore;
+ Ptr = SI->getPointerOperand();
+ ValTy = SI->getValueOperand()->getType();
+ } else
+ return nullptr;
+
+ bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;
+ bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+ const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
+
+ AAMDNodes AAInfo;
+ I->getAAMetadata(AAInfo);
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0.
+ Alignment = DL.getABITypeAlignment(ValTy);
+
+ unsigned Size = DL.getTypeStoreSize(ValTy);
+
+ if (IsVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (IsNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+ if (IsInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
+
+ return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
+ Alignment, AAInfo, Ranges);
+}
+
+CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const {
+ // If both operands are the same, then try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (CI->getOperand(0) != CI->getOperand(1))
+ return Predicate;
+
+ switch (Predicate) {
+ default: llvm_unreachable("Invalid predicate!");
+ case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break;
+
+ case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break;
+ }
+
+ return Predicate;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 000000000000..e669ffc3d02a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,615 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "function-lowering-info"
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+ if (I->use_empty()) return false;
+ if (isa<PHINode>(I)) return true;
+ const BasicBlock *BB = I->getParent();
+ for (const User *U : I->users())
+ if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
+ return true;
+
+ return false;
+}
+
+static ISD::NodeType getPreferredExtendForValue(const Value *V) {
+ // For the users of the source value being used for compare instruction, if
+ // the number of signed predicate is greater than unsigned predicate, we
+ // prefer to use SIGN_EXTEND.
+ //
+ // With this optimization, we would be able to reduce some redundant sign or
+ // zero extension instruction, and eventually more machine CSE opportunities
+ // can be exposed.
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ unsigned NumOfSigned = 0, NumOfUnsigned = 0;
+ for (const User *U : V->users()) {
+ if (const auto *CI = dyn_cast<CmpInst>(U)) {
+ NumOfSigned += CI->isSigned();
+ NumOfUnsigned += CI->isUnsigned();
+ }
+ }
+ if (NumOfSigned > NumOfUnsigned)
+ ExtendKind = ISD::SIGN_EXTEND;
+
+ return ExtendKind;
+}
+
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
+ SelectionDAG *DAG) {
+ Fn = &fn;
+ MF = &mf;
+ TLI = MF->getSubtarget().getTargetLowering();
+ RegInfo = &MF->getRegInfo();
+ MachineModuleInfo &MMI = MF->getMMI();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,
+ mf.getDataLayout());
+ CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF,
+ Fn->isVarArg(), Outs, Fn->getContext());
+
+ // If this personality uses funclets, we need to do a bit more work.
+ DenseMap<const AllocaInst *, int *> CatchObjects;
+ EHPersonality Personality = classifyEHPersonality(
+ Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr);
+ if (isFuncletEHPersonality(Personality)) {
+ // Calculate state numbers if we haven't already.
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+ if (Personality == EHPersonality::MSVC_CXX)
+ calculateWinCXXEHStateNumbers(&fn, EHInfo);
+ else if (isAsynchronousEHPersonality(Personality))
+ calculateSEHStateNumbers(&fn, EHInfo);
+ else if (Personality == EHPersonality::CoreCLR)
+ calculateClrEHStateNumbers(&fn, EHInfo);
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (const AllocaInst *AI = H.CatchObj.Alloca)
+ CatchObjects.insert({AI, &H.CatchObj.FrameIndex});
+ else
+ H.CatchObj.FrameIndex = INT_MAX;
+ }
+ }
+ }
+
+ // Initialize the mapping of values to registers. This is only set up for
+ // instruction values that are used outside of the block that defines
+ // them.
+ Function::const_iterator BB = Fn->begin(), EB = Fn->end();
+ for (; BB != EB; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ Type *Ty = AI->getAllocatedType();
+ unsigned Align =
+ std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
+ AI->getAlignment());
+
+ // Static allocas can be folded into the initial stack frame
+ // adjustment. For targets that don't realign the stack, don't
+ // do this if there is an extra alignment requirement.
+ if (AI->isStaticAlloca() &&
+ (TFI->isStackRealignable() || (Align <= StackAlign))) {
+ const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
+ uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
+
+ TySize *= CUI->getZExtValue(); // Get total allocated size.
+ if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+ int FrameIndex = INT_MAX;
+ auto Iter = CatchObjects.find(AI);
+ if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
+ FrameIndex = MF->getFrameInfo()->CreateFixedObject(
+ TySize, 0, /*Immutable=*/false, /*isAliased=*/true);
+ MF->getFrameInfo()->setObjectAlignment(FrameIndex, Align);
+ } else {
+ FrameIndex =
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
+ }
+
+ StaticAllocaMap[AI] = FrameIndex;
+ // Update the catch handler information.
+ if (Iter != CatchObjects.end())
+ *Iter->second = FrameIndex;
+ } else {
+ // FIXME: Overaligned static allocas should be grouped into
+ // a single dynamic allocation instead of using a separate
+ // stack allocation for each one.
+ if (Align <= StackAlign)
+ Align = 0;
+ // Inform the Frame Information that we have variable-sized objects.
+ MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI);
+ }
+ }
+
+ // Look for inline asm that clobbers the SP register.
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ ImmutableCallSite CS(&*I);
+ if (isa<InlineAsm>(CS.getCalledValue())) {
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ std::vector<TargetLowering::AsmOperandInfo> Ops =
+ TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS);
+ for (size_t I = 0, E = Ops.size(); I != E; ++I) {
+ TargetLowering::AsmOperandInfo &Op = Ops[I];
+ if (Op.Type == InlineAsm::isClobber) {
+ // Clobbers don't have SDValue operands, hence SDValue().
+ TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
+ std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+ TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
+ Op.ConstraintVT);
+ if (PhysReg.first == SP)
+ MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+ }
+ }
+ }
+ }
+
+ // Look for calls to the @llvm.va_start intrinsic. We can omit some
+ // prologue boilerplate for variadic functions that don't examine their
+ // arguments.
+ if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::vastart)
+ MF->getFrameInfo()->setHasVAStart(true);
+ }
+
+ // If we have a musttail call in a variadic function, we need to ensure we
+ // forward implicit register parameters.
+ if (const auto *CI = dyn_cast<CallInst>(I)) {
+ if (CI->isMustTailCall() && Fn->isVarArg())
+ MF->getFrameInfo()->setHasMustTailInVarArgFunc(true);
+ }
+
+ // Mark values used outside their block as exported, by allocating
+ // a virtual register for them.
+ if (isUsedOutsideOfDefiningBlock(&*I))
+ if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I)))
+ InitializeRegForValue(&*I);
+
+ // Collect llvm.dbg.declare information. This is done now instead of
+ // during the initial isel pass through the IR so that it is done
+ // in a predictable order.
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+ assert(DI->getVariable() && "Missing variable");
+ assert(DI->getDebugLoc() && "Missing location");
+ if (MMI.hasDebugInfo()) {
+ // Don't handle byval struct arguments or VLAs, for example.
+ // Non-byval arguments are handled here (they refer to the stack
+ // temporary alloca at this point).
+ const Value *Address = DI->getAddress();
+ if (Address) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ DenseMap<const AllocaInst *, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI != StaticAllocaMap.end()) { // Check for VLAs.
+ int FI = SI->second;
+ MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
+ FI, DI->getDebugLoc());
+ }
+ }
+ }
+ }
+ }
+
+ // Decide the preferred extend type for a value.
+ PreferredExtendType[&*I] = getPreferredExtendForValue(&*I);
+ }
+
+ // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
+ // also creates the initial PHI MachineInstrs, though none of the input
+ // operands are populated.
+ for (BB = Fn->begin(); BB != EB; ++BB) {
+ // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
+ // are really data, and no instructions can live here.
+ if (BB->isEHPad()) {
+ const Instruction *I = BB->getFirstNonPHI();
+ // If this is a non-landingpad EH pad, mark this function as using
+ // funclets.
+ // FIXME: SEH catchpads do not create funclets, so we could avoid setting
+ // this in such cases in order to improve frame layout.
+ if (!isa<LandingPadInst>(I)) {
+ MMI.setHasEHFunclets(true);
+ MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+ }
+ if (isa<CatchSwitchInst>(I)) {
+ assert(&*BB->begin() == I &&
+ "WinEHPrepare failed to remove PHIs from imaginary BBs");
+ continue;
+ }
+ if (isa<FuncletPadInst>(I))
+ assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs");
+ }
+
+ MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB);
+ MBBMap[&*BB] = MBB;
+ MF->push_back(MBB);
+
+ // Transfer the address-taken flag. This is necessary because there could
+ // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+ // the first one should be marked.
+ if (BB->hasAddressTaken())
+ MBB->setHasAddressTaken();
+
+ // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+ // appropriate.
+ for (BasicBlock::const_iterator I = BB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ if (PN->use_empty()) continue;
+
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
+ DebugLoc DL = PN->getDebugLoc();
+ unsigned PHIReg = ValueMap[PN];
+ assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ for (unsigned i = 0; i != NumRegisters; ++i)
+ BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+ PHIReg += NumRegisters;
+ }
+ }
+ }
+
+ // Mark landing pad blocks.
+ SmallVector<const LandingPadInst *, 4> LPads;
+ for (BB = Fn->begin(); BB != EB; ++BB) {
+ const Instruction *FNP = BB->getFirstNonPHI();
+ if (BB->isEHPad() && MBBMap.count(&*BB))
+ MBBMap[&*BB]->setIsEHPad();
+ if (const auto *LPI = dyn_cast<LandingPadInst>(FNP))
+ LPads.push_back(LPI);
+ }
+
+ if (!isFuncletEHPersonality(Personality))
+ return;
+
+ WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+
+ // Map all BB references in the WinEH data to MBBs.
+ for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+ for (WinEHHandlerType &H : TBME.HandlerArray) {
+ if (H.Handler)
+ H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
+ }
+ }
+ for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
+ if (UME.Cleanup)
+ UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+ for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
+ const BasicBlock *BB = UME.Handler.get<const BasicBlock *>();
+ UME.Handler = MBBMap[BB];
+ }
+ for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
+ const BasicBlock *BB = CME.Handler.get<const BasicBlock *>();
+ CME.Handler = MBBMap[BB];
+ }
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+ MBBMap.clear();
+ ValueMap.clear();
+ StaticAllocaMap.clear();
+ LiveOutRegInfo.clear();
+ VisitedBBs.clear();
+ ArgDbgValues.clear();
+ ByValArgFrameIndexMap.clear();
+ RegFixups.clear();
+ StatepointStackSlots.clear();
+ StatepointSpillMaps.clear();
+ PreferredExtendType.clear();
+}
+
+/// CreateReg - Allocate a single virtual register for the given type.
+unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
+ return RegInfo->createVirtualRegister(
+ MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
+}
+
+/// CreateRegs - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types. Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
+
+ unsigned FirstReg = 0;
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT);
+
+ unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ unsigned R = CreateReg(RegisterVT);
+ if (!FirstReg) FirstReg = R;
+ }
+ }
+ return FirstReg;
+}
+
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+ if (!LiveOutRegInfo.inBounds(Reg))
+ return nullptr;
+
+ LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+ if (!LOI->IsValid)
+ return nullptr;
+
+ if (BitWidth > LOI->KnownZero.getBitWidth()) {
+ LOI->NumSignBits = 1;
+ LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
+ LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+ }
+
+ return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+ Type *Ty = PN->getType();
+ if (!Ty->isIntegerTy() || Ty->isVectorTy())
+ return;
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
+ assert(ValueVTs.size() == 1 &&
+ "PHIs with non-vector integer types should have a single VT.");
+ EVT IntVT = ValueVTs[0];
+
+ if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1)
+ return;
+ IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT);
+ unsigned BitWidth = IntVT.getSizeInBits();
+
+ unsigned DestReg = ValueMap[PN];
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ return;
+ LiveOutRegInfo.grow(DestReg);
+ LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+ Value *V = PN->getIncomingValue(0);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = Val.getNumSignBits();
+ DestLOI.KnownZero = ~Val;
+ DestLOI.KnownOne = Val;
+ } else {
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+ "CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI = *SrcLOI;
+ }
+
+ assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
+ DestLOI.KnownOne.getBitWidth() == BitWidth &&
+ "Masks should have the same bit width as the type.");
+
+ for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *V = PN->getIncomingValue(i);
+ if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+ DestLOI.NumSignBits = 1;
+ APInt Zero(BitWidth, 0);
+ DestLOI.KnownZero = Zero;
+ DestLOI.KnownOne = Zero;
+ return;
+ }
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+ DestLOI.KnownZero &= ~Val;
+ DestLOI.KnownOne &= Val;
+ continue;
+ }
+
+ assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+ "its CopyToReg node was created.");
+ unsigned SrcReg = ValueMap[V];
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+ if (!SrcLOI) {
+ DestLOI.IsValid = false;
+ return;
+ }
+ DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+ DestLOI.KnownZero &= SrcLOI->KnownZero;
+ DestLOI.KnownOne &= SrcLOI->KnownOne;
+ }
+}
+
+/// setArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
+ int FI) {
+ ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
+ DenseMap<const Argument *, int>::iterator I =
+ ByValArgFrameIndexMap.find(A);
+ if (I != ByValArgFrameIndexMap.end())
+ return I->second;
+ DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
+ return 0;
+}
+
+unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
+ const Value *CPI, const TargetRegisterClass *RC) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ auto I = CatchPadExceptionPointers.insert({CPI, 0});
+ unsigned &VReg = I.first->second;
+ if (I.second)
+ VReg = MRI.createVirtualRegister(RC);
+ assert(VReg && "null vreg in exception pointer table!");
+ return VReg;
+}
+
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+ MachineModuleInfo *MMI)
+{
+ FunctionType *FT = cast<FunctionType>(
+ I.getCalledValue()->getType()->getContainedType(0));
+ if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Type* T = I.getArgOperand(i)->getType();
+ for (auto i : post_order(T)) {
+ if (i->isFloatingPointTy()) {
+ MMI->setUsesVAFloatArgument(true);
+ return;
+ }
+ }
+ }
+ }
+}
+
+/// AddLandingPadInfo - Extract the exception handling information from the
+/// landingpad instruction and add them to the specified machine module info.
+void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
+ MachineBasicBlock *MBB) {
+ if (const auto *PF = dyn_cast<Function>(
+ I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
+ MMI.addPersonality(PF);
+
+ if (I.isCleanup())
+ MMI.addCleanup(MBB);
+
+ // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
+ // but we need to do it this way because of how the DWARF EH emitter
+ // processes the clauses.
+ for (unsigned i = I.getNumClauses(); i != 0; --i) {
+ Value *Val = I.getClause(i - 1);
+ if (I.isCatch(i - 1)) {
+ MMI.addCatchTypeInfo(MBB,
+ dyn_cast<GlobalValue>(Val->stripPointerCasts()));
+ } else {
+ // Add filters in a list.
+ Constant *CVal = cast<Constant>(Val);
+ SmallVector<const GlobalValue*, 4> FilterList;
+ for (User::op_iterator
+ II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
+ FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+
+ MMI.addFilterTypeInfo(MBB, FilterList);
+ }
+ }
+}
+
+unsigned FunctionLoweringInfo::findSwiftErrorVReg(const MachineBasicBlock *MBB,
+ const Value* Val) const {
+ // Find the index in SwiftErrorVals.
+ SwiftErrorValues::const_iterator I =
+ std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
+ assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
+ return SwiftErrorMap.lookup(MBB)[I - SwiftErrorVals.begin()];
+}
+
+void FunctionLoweringInfo::setSwiftErrorVReg(const MachineBasicBlock *MBB,
+ const Value* Val, unsigned VReg) {
+ // Find the index in SwiftErrorVals.
+ SwiftErrorValues::iterator I =
+ std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
+ assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
+ SwiftErrorMap[MBB][I - SwiftErrorVals.begin()] = VReg;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 000000000000..c8af73a3b441
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,1050 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "instr-emitter"
+
+/// MinRCSize - Smallest register class we allow when constraining virtual
+/// registers. If satisfying all register class constraints would require
+/// using a smaller register class, emit a COPY to a new virtual register
+/// instead.
+const unsigned MinRCSize = 4;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional glue operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+ unsigned N = Node->getNumValues();
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
+ --N;
+ if (N && Node->getValueType(N - 1) == MVT::Other)
+ --N; // Skip over chain result.
+ return N;
+}
+
+/// countOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional glue operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+///
+/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding
+/// the chain and glue. These operands may be implicit on the machine instr.
+static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
+ unsigned &NumImpUses) {
+ unsigned N = Node->getNumOperands();
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+ --N;
+ if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+ --N; // Ignore chain if it exists.
+
+ // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses.
+ NumImpUses = N - NumExpUses;
+ for (unsigned I = N; I > NumExpUses; --I) {
+ if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
+ continue;
+ if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
+ if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))
+ continue;
+ NumImpUses = N - I;
+ break;
+ }
+
+ return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+ unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VRBase = 0;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ // Just use the input register directly!
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ return;
+ }
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ bool MatchReg = true;
+ const TargetRegisterClass *UseRC = nullptr;
+ MVT VT = Node->getSimpleValueType(ResNo);
+
+ // Stick to the preferred register classes for legal types.
+ if (TLI->isTypeLegal(VT))
+ UseRC = TLI->getRegClassFor(VT);
+
+ if (!IsClone && !IsCloned)
+ for (SDNode *User : Node->uses()) {
+ bool Match = true;
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ Match = false;
+ } else if (DestReg != SrcReg)
+ Match = false;
+ } else {
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ SDValue Op = User->getOperand(i);
+ if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ continue;
+ MVT VT = Node->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Other || VT == MVT::Glue)
+ continue;
+ Match = false;
+ if (User->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(User->getMachineOpcode());
+ const TargetRegisterClass *RC = nullptr;
+ if (i+II.getNumDefs() < II.getNumOperands()) {
+ RC = TRI->getAllocatableClass(
+ TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
+ }
+ if (!UseRC)
+ UseRC = RC;
+ else if (RC) {
+ const TargetRegisterClass *ComRC =
+ TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy);
+ // If multiple uses expect disjoint register classes, we emit
+ // copies in AddRegisterOperand.
+ if (ComRC)
+ UseRC = ComRC;
+ }
+ }
+ }
+ }
+ MatchReg &= Match;
+ if (VRBase)
+ break;
+ }
+
+ const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr;
+ SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
+
+ // Figure out the register class to create for the destreg.
+ if (VRBase) {
+ DstRC = MRI->getRegClass(VRBase);
+ } else if (UseRC) {
+ assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+ DstRC = UseRC;
+ } else {
+ DstRC = TLI->getRegClassFor(VT);
+ }
+
+ // If all uses are reading from the src physical register and copying the
+ // register is either impossible or very expensive, then don't create a copy.
+ if (MatchReg && SrcRC->getCopyCost() < 0) {
+ VRBase = SrcReg;
+ } else {
+ // Create the reg, emit the copy.
+ VRBase = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ VRBase).addReg(SrcReg);
+ }
+
+ SDValue Op(Node, ResNo);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const {
+ if (!Node->hasOneUse())
+ return 0;
+
+ SDNode *User = *Node->use_begin();
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == ResNo) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return Reg;
+ }
+ return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+ "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+ unsigned NumResults = CountResults(Node);
+ for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+ // If the specific node value is only used by a CopyToReg and the dest reg
+ // is a vreg in the same register class, use the CopyToReg'd destination
+ // register instead of creating a new vreg.
+ unsigned VRBase = 0;
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
+ // Always let the value type influence the used register class. The
+ // constraints on the instruction may be too lax to represent the value
+ // type correctly. For example, a 64-bit float (X86::FR64) can't live in
+ // the 32-bit float super-class (X86::FR32).
+ if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
+ const TargetRegisterClass *VTRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(i));
+ if (RC)
+ VTRC = TRI->getCommonSubClass(RC, VTRC);
+ if (VTRC)
+ RC = VTRC;
+ }
+
+ if (II.OpInfo[i].isOptionalDef()) {
+ // Optional def must be a physical register.
+ unsigned NumResults = CountResults(Node);
+ VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ if (!VRBase && !IsClone && !IsCloned)
+ for (SDNode *User : Node->uses()) {
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node &&
+ User->getOperand(2).getResNo() == i) {
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+ if (RegRC == RC) {
+ VRBase = Reg;
+ MIB.addReg(VRBase, RegState::Define);
+ break;
+ }
+ }
+ }
+ }
+
+ // Create the result registers for this node and add the result regs to
+ // the machine instruction.
+ if (VRBase == 0) {
+ assert(RC && "Isn't a register operand!");
+ VRBase = MRI->createVirtualRegister(RC);
+ MIB.addReg(VRBase, RegState::Define);
+ }
+
+ // If this def corresponds to a result of the SDNode insert the VRBase into
+ // the lookup map.
+ if (i < NumResults) {
+ SDValue Op(Node, i);
+ if (IsClone)
+ VRBaseMap.erase(Op);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ }
+ }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Add an IMPLICIT_DEF instruction before every use.
+ unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+ // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
+ // does not include operand register class info.
+ if (!VReg) {
+ const TargetRegisterClass *RC =
+ TLI->getRegClassFor(Op.getSimpleValueType());
+ VReg = MRI->createVirtualRegister(RC);
+ }
+ BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ return VReg;
+ }
+
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+ return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ // Get/emit the operand.
+ unsigned VReg = getVR(Op, VRBaseMap);
+
+ const MCInstrDesc &MCID = MIB->getDesc();
+ bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+ MCID.OpInfo[IIOpNum].isOptionalDef();
+
+ // If the instruction requires a register in a different class, create
+ // a new virtual register and copy the value into it, but first attempt to
+ // shrink VReg's register class within reason. For example, if VReg == GR32
+ // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
+ if (II) {
+ const TargetRegisterClass *DstRC = nullptr;
+ if (IIOpNum < II->getNumOperands())
+ DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
+ assert((!DstRC || TargetRegisterInfo::isVirtualRegister(VReg)) &&
+ "Expected VReg");
+ if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+ VReg = NewVReg;
+ }
+ }
+
+ // If this value has only one use, that use is a kill. This is a
+ // conservative approximation. InstrEmitter does trivial coalescing
+ // with CopyFromReg nodes, so don't emit kill flags for them.
+ // Avoid kill flags on Schedule cloned nodes, since there will be
+ // multiple uses.
+ // Tied operands are never killed, so we need to check that. And that
+ // means we need to determine the index of the operand.
+ bool isKill = Op.hasOneUse() &&
+ Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+ !IsDebug &&
+ !(IsClone || IsCloned);
+ if (isKill) {
+ unsigned Idx = MIB->getNumOperands();
+ while (Idx > 0 &&
+ MIB->getOperand(Idx-1).isReg() &&
+ MIB->getOperand(Idx-1).isImplicit())
+ --Idx;
+ bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
+ if (isTied)
+ isKill = false;
+ }
+
+ MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) |
+ getDebugRegState(IsDebug));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr. II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding.
+void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned) {
+ if (Op.isMachineOpcode()) {
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ MIB.addImm(C->getSExtValue());
+ } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+ MIB.addFPImm(F->getConstantFPValue());
+ } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+ // Turn additional physreg operands into implicit uses on non-variadic
+ // instructions. This is used by call and return instructions passing
+ // arguments in registers.
+ bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic());
+ MIB.addReg(R->getReg(), getImplRegState(Imp));
+ } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+ MIB.addRegMask(RM->getRegMask());
+ } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+ MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(),
+ TGA->getTargetFlags());
+ } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+ MIB.addMBB(BBNode->getBasicBlock());
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+ MIB.addFrameIndex(FI->getIndex());
+ } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+ MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags());
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+ int Offset = CP->getOffset();
+ unsigned Align = CP->getAlignment();
+ Type *Type = CP->getType();
+ // MachineConstantPool wants an explicit alignment.
+ if (Align == 0) {
+ Align = MF->getDataLayout().getPrefTypeAlignment(Type);
+ if (Align == 0) {
+ // Alignment of vector types. FIXME!
+ Align = MF->getDataLayout().getTypeAllocSize(Type);
+ }
+ }
+
+ unsigned Idx;
+ MachineConstantPool *MCP = MF->getConstantPool();
+ if (CP->isMachineConstantPoolEntry())
+ Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+ else
+ Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+ MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags());
+ } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+ MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags());
+ } else if (auto *SymNode = dyn_cast<MCSymbolSDNode>(Op)) {
+ MIB.addSym(SymNode->getMCSymbol());
+ } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+ MIB.addBlockAddress(BA->getBlockAddress(),
+ BA->getOffset(),
+ BA->getTargetFlags());
+ } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+ MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags());
+ } else {
+ assert(Op.getValueType() != MVT::Other &&
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
+ AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
+ }
+}
+
+unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
+ MVT VT, const DebugLoc &DL) {
+ const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
+
+ // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg
+ // within reason.
+ if (RC && RC != VRC)
+ RC = MRI->constrainRegClass(VReg, RC, MinRCSize);
+
+ // VReg has been adjusted. It can be used with SubIdx operands now.
+ if (RC)
+ return VReg;
+
+ // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
+ // register instead.
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+ assert(RC && "No legal register class for VT supports that SubIdx");
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VReg);
+ return NewReg;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned VRBase = 0;
+ unsigned Opc = Node->getMachineOpcode();
+
+ // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+ // the CopyToReg'd destination register instead of creating a new vreg.
+ for (SDNode *User : Node->uses()) {
+ if (User->getOpcode() == ISD::CopyToReg &&
+ User->getOperand(2).getNode() == Node) {
+ unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ VRBase = DestReg;
+ break;
+ }
+ }
+ }
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no
+ // constraints on the %dst register, COPY can target all legal register
+ // classes.
+ unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *TRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(0));
+
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned SrcReg, DstReg, DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx &&
+ TRC == MRI->getRegClass(SrcReg)) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ } else {
+ // VReg may not support a SubIdx sub-register, and we may need to
+ // constrain its register class or issue a COPY to a compatible register
+ // class.
+ VReg = ConstrainForSubReg(VReg, SubIdx,
+ Node->getOperand(0).getSimpleValueType(),
+ Node->getDebugLoc());
+
+ // Create the destreg if it is missing.
+ if (VRBase == 0)
+ VRBase = MRI->createVirtualRegister(TRC);
+
+ // Create the extract_subreg machine instruction.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx);
+ }
+ } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+ SDValue N2 = Node->getOperand(2);
+ unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Figure out the register class to create for the destreg. It should be
+ // the largest legal register class supporting SubIdx sub-registers.
+ // RegisterCoalescer will constrain it further if it decides to eliminate
+ // the INSERT_SUBREG instruction.
+ //
+ // %dst = INSERT_SUBREG %src, %sub, SubIdx
+ //
+ // is lowered by TwoAddressInstructionPass to:
+ //
+ // %dst = COPY %src
+ // %dst:SubIdx = COPY %sub
+ //
+ // There is no constraint on the %src register class.
+ //
+ const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
+ SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
+ assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
+
+ if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase)))
+ VRBase = MRI->createVirtualRegister(SRC);
+
+ // Create the insert_subreg or subreg_to_reg machine instruction.
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase);
+
+ // If creating a subreg_to_reg, then the first input operand
+ // is an implicit value immediate, otherwise it's a register
+ if (Opc == TargetOpcode::SUBREG_TO_REG) {
+ const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+ MIB.addImm(SD->getZExtValue());
+ } else
+ AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ // Add the subregster being inserted
+ AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ MIB.addImm(SubIdx);
+ MBB->insert(InsertPos, MIB);
+ } else
+ llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+
+ // Create the new VReg in the destination class and emit a copy.
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ const TargetRegisterClass *DstRC =
+ TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
+ unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg).addReg(VReg);
+
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
+ const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
+ unsigned NumOps = Node->getNumOperands();
+ assert((NumOps & 1) == 1 &&
+ "REG_SEQUENCE must have an odd number of operands!");
+ for (unsigned i = 1; i != NumOps; ++i) {
+ SDValue Op = Node->getOperand(i);
+ if ((i & 1) == 0) {
+ RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+ // Skip physical registers as they don't have a vreg to get and we'll
+ // insert copies for them in TwoAddressInstructionPass anyway.
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+ unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+ if (SRC && SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
+ }
+ }
+ AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ }
+
+ MBB->insert(InsertPos, MIB);
+ SDValue Op(Node, 0);
+ bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
+///
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ uint64_t Offset = SD->getOffset();
+ MDNode *Var = SD->getVariable();
+ MDNode *Expr = SD->getExpression();
+ DebugLoc DL = SD->getDebugLoc();
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+
+ if (SD->getKind() == SDDbgValue::FRAMEIX) {
+ // Stack address; this needs to be lowered in target-dependent fashion.
+ // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+ return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
+ .addFrameIndex(SD->getFrameIx())
+ .addImm(Offset)
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ }
+ // Otherwise, we're going to create an instruction here.
+ const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ if (SD->getKind() == SDDbgValue::SDNODE) {
+ SDNode *Node = SD->getSDNode();
+ SDValue Op = SDValue(Node, SD->getResNo());
+ // It's possible we replaced this SDNode with other(s) and therefore
+ // didn't generate code for it. It's better to catch these cases where
+ // they happen and transfer the debug info, but trying to guarantee that
+ // in all cases would be very fragile; this is a safeguard for any
+ // that were missed.
+ DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+ if (I==VRBaseMap.end())
+ MIB.addReg(0U); // undef
+ else
+ AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+ /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+ } else if (SD->getKind() == SDDbgValue::CONST) {
+ const Value *V = SD->getConst();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getSExtValue());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ MIB.addFPImm(CF);
+ } else {
+ // Could be an Undef. In any case insert an Undef so we can see what we
+ // dropped.
+ MIB.addReg(0U);
+ }
+ } else {
+ // Insert an Undef so we can see what we dropped.
+ MIB.addReg(0U);
+ }
+
+ // Indirect addressing is indicated by an Imm as the second parameter.
+ if (SD->isIndirect())
+ MIB.addImm(Offset);
+ else {
+ assert(Offset == 0 && "direct value cannot have an offset");
+ MIB.addReg(0U, RegState::Debug);
+ }
+
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+
+ return &*MIB;
+}
+
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
+///
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
+
+ // Handle REG_SEQUENCE specially.
+ if (Opc == TargetOpcode::REG_SEQUENCE) {
+ EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
+ return;
+ }
+
+ if (Opc == TargetOpcode::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const MCInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NumDefs = II.getNumDefs();
+ const MCPhysReg *ScratchRegs = nullptr;
+
+ // Handle STACKMAP and PATCHPOINT specially and then use the generic code.
+ if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
+ // Stackmaps do not have arguments and do not preserve their calling
+ // convention. However, to simplify runtime support, they clobber the same
+ // scratch registers as AnyRegCC.
+ unsigned CC = CallingConv::AnyReg;
+ if (Opc == TargetOpcode::PATCHPOINT) {
+ CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
+ NumDefs = NumResults;
+ }
+ ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
+ }
+
+ unsigned NumImpUses = 0;
+ unsigned NodeOperands =
+ countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
+ bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr;
+#ifndef NDEBUG
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ if (II.isVariadic())
+ assert(NumMIOperands >= II.getNumOperands() &&
+ "Too few operands for a variadic node!");
+ else
+ assert(NumMIOperands >= II.getNumOperands() &&
+ NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() +
+ NumImpUses &&
+ "#operands for dag node doesn't match .td file!");
+#endif
+
+ // Create the new machine instruction.
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults)
+ CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap);
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ bool HasOptPRefs = NumDefs > NumResults;
+ assert((!HasOptPRefs || !HasPhysRegOuts) &&
+ "Unable to cope with optional defs and phys regs defs!");
+ unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0;
+ for (unsigned i = NumSkip; i != NodeOperands; ++i)
+ AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II,
+ VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Add scratch registers as implicit def and early clobber
+ if (ScratchRegs)
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine |
+ RegState::EarlyClobber);
+
+ // Transfer all of the memory reference descriptions of this instruction.
+ MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+ cast<MachineSDNode>(Node)->memoperands_end());
+
+ // Insert the instruction into position in the block. This needs to
+ // happen before any custom inserter hook is called so that the
+ // hook knows where in the block to insert the replacement code.
+ MBB->insert(InsertPos, MIB);
+
+ // The MachineInstr may also define physregs instead of virtregs. These
+ // physreg values can reach other instructions in different ways:
+ //
+ // 1. When there is a use of a Node value beyond the explicitly defined
+ // virtual registers, we emit a CopyFromReg for one of the implicitly
+ // defined physregs. This only happens when HasPhysRegOuts is true.
+ //
+ // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+ //
+ // 3. A glued instruction may implicitly use a physreg.
+ //
+ // 4. A glued instruction may use a RegisterSDNode operand.
+ //
+ // Collect all the used physreg defs, and make sure that any unused physreg
+ // defs are marked as dead.
+ SmallVector<unsigned, 8> UsedRegs;
+
+ // Additional results must be physical register defs.
+ if (HasPhysRegOuts) {
+ for (unsigned i = NumDefs; i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - NumDefs];
+ if (!Node->hasAnyUseOfValue(i))
+ continue;
+ // This implicitly defined physreg has a use.
+ UsedRegs.push_back(Reg);
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ }
+ }
+
+ // Scan the glue chain for any used physregs.
+ if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+ if (F->getOpcode() == ISD::CopyFromReg) {
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ continue;
+ } else if (F->getOpcode() == ISD::CopyToReg) {
+ // Skip CopyToReg nodes that are internal to the glue chain.
+ continue;
+ }
+ // Collect declared implicit uses.
+ const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(MCID.getImplicitUses(),
+ MCID.getImplicitUses() + MCID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
+ }
+ }
+
+ // Finally mark unused registers as dead.
+ if (!UsedRegs.empty() || II.getImplicitDefs())
+ MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
+
+ // Run post-isel target hook to adjust this instruction if needed.
+ if (II.hasPostISelHook())
+ TLI->AdjustInstrPostInstrSelection(*MIB, Node);
+}
+
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ Node->dump();
+#endif
+ llvm_unreachable("This target-independent node should have been selected!");
+ case ISD::EntryToken:
+ llvm_unreachable("EntryToken should have been excluded from the schedule!");
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor: // fall thru
+ break;
+ case ISD::CopyToReg: {
+ unsigned SrcReg;
+ SDValue SrcVal = Node->getOperand(2);
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+ SrcReg = R->getReg();
+ else
+ SrcReg = getVR(SrcVal, VRBaseMap);
+
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+ break;
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DestReg).addReg(SrcReg);
+ break;
+ }
+ case ISD::CopyFromReg: {
+ unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+ break;
+ }
+ case ISD::EH_LABEL: {
+ MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+ break;
+ }
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ?
+ TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END;
+
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1));
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addFrameIndex(FI->getIndex());
+ break;
+ }
+
+ case ISD::INLINEASM: {
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ // Create the inline asm machine instruction.
+ MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::INLINEASM));
+
+ // Add the asm string as an external symbol operand.
+ SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+ const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
+ MIB.addExternalSymbol(AsmStr);
+
+ // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore
+ // bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
+ getZExtValue();
+ MIB.addImm(ExtraInfo);
+
+ // Remember to operand index of the group flags.
+ SmallVector<unsigned, 8> GroupIdx;
+
+ // Remember registers that are part of early-clobber defs.
+ SmallVector<unsigned, 8> ECRegs;
+
+ // Add all of the operand registers to the instruction.
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ GroupIdx.push_back(MIB->getNumOperands());
+ MIB.addImm(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default: llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegDef:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ // FIXME: Add dead flags for physical and virtual registers defined.
+ // For now, mark physical register defs as implicit to help fast
+ // regalloc. This makes inline asm look a lot like calls.
+ MIB.addReg(Reg, RegState::Define |
+ getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ }
+ break;
+ case InlineAsm::Kind_RegDefEarlyClobber:
+ case InlineAsm::Kind_Clobber:
+ for (unsigned j = 0; j != NumVals; ++j, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
+ getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ ECRegs.push_back(Reg);
+ }
+ break;
+ case InlineAsm::Kind_RegUse: // Use of register.
+ case InlineAsm::Kind_Imm: // Immediate.
+ case InlineAsm::Kind_Mem: // Addressing mode.
+ // The addressing mode has been selected, just add all of the
+ // operands to the machine instruction.
+ for (unsigned j = 0; j != NumVals; ++j, ++i)
+ AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
+
+ // Manually set isTied bits.
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) {
+ unsigned DefGroup = 0;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) {
+ unsigned DefIdx = GroupIdx[DefGroup] + 1;
+ unsigned UseIdx = GroupIdx.back() + 1;
+ for (unsigned j = 0; j != NumVals; ++j)
+ MIB->tieOperands(DefIdx + j, UseIdx + j);
+ }
+ }
+ break;
+ }
+ }
+
+ // GCC inline assembly allows input operands to also be early-clobber
+ // output operands (so long as the operand is written only after it's
+ // used), but this does not match the semantics of our early-clobber flag.
+ // If an early-clobber operand register is also an input operand register,
+ // then remove the early-clobber flag.
+ for (unsigned Reg : ECRegs) {
+ if (MIB->readsRegister(Reg, TRI)) {
+ MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI);
+ assert(MO && "No def operand for clobbered register?");
+ MO->setIsEarlyClobber(false);
+ }
+ }
+
+ // Get the mdnode from the asm if it exists and add it to the instruction.
+ SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+ const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+ if (MD)
+ MIB.addMetadata(MD);
+
+ MBB->insert(InsertPos, MIB);
+ break;
+ }
+ }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos)
+ : MF(mbb->getParent()), MRI(&MF->getRegInfo()),
+ TII(MF->getSubtarget().getInstrInfo()),
+ TRI(MF->getSubtarget().getRegisterInfo()),
+ TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
+ InsertPos(insertpos) {}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 000000000000..8a8a1bbd18f7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,145 @@
+//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+
+class MachineInstrBuilder;
+class MCInstrDesc;
+class SDDbgValue;
+
+class LLVM_LIBRARY_VISIBILITY InstrEmitter {
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+
+ MachineBasicBlock *MBB;
+ MachineBasicBlock::iterator InsertPos;
+
+ /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+ /// implicit physical register output.
+ void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+ bool IsClone, bool IsCloned,
+ unsigned SrcReg,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getDstOfCopyToRegUse - If the only use of the specified result number of
+ /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+ unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+ unsigned ResNo) const;
+
+ void CreateVirtualRegisters(SDNode *Node,
+ MachineInstrBuilder &MIB,
+ const MCInstrDesc &II,
+ bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// getVR - Return the virtual register corresponding to the specified result
+ /// of the specified node.
+ unsigned getVR(SDValue Op,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// AddRegisterOperand - Add the specified register as an operand to the
+ /// specified machine instr. Insert register copies if the register is
+ /// not in the required register class.
+ void AddRegisterOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// AddOperand - Add the specified operand to the specified machine instr. II
+ /// specifies the instruction information for the node, and IIOpNum is the
+ /// operand number (in the II) that we are adding. IIOpNum and II are used for
+ /// assertions only.
+ void AddOperand(MachineInstrBuilder &MIB,
+ SDValue Op,
+ unsigned IIOpNum,
+ const MCInstrDesc *II,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug, bool IsClone, bool IsCloned);
+
+ /// ConstrainForSubReg - Try to constrain VReg to a register class that
+ /// supports SubIdx sub-registers. Emit a copy if that isn't possible.
+ /// Return the virtual register to use.
+ unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
+ const DebugLoc &DL);
+
+ /// EmitSubregNode - Generate machine code for subreg nodes.
+ ///
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+
+ /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+ /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+ /// register is constrained to be in a particular register class.
+ ///
+ void EmitCopyToRegClassNode(SDNode *Node,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+ ///
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
+public:
+ /// CountResults - The results of target nodes have register or immediate
+ /// operands first, then an optional chain, and optional flag operands
+ /// (which do not go into the machine instrs.)
+ static unsigned CountResults(SDNode *Node);
+
+ /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+ ///
+ MachineInstr *EmitDbgValue(SDDbgValue *SD,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+
+ /// EmitNode - Generate machine code for a node and needed dependencies.
+ ///
+ void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
+ if (Node->isMachineOpcode())
+ EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
+ else
+ EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+ }
+
+ /// getBlock - Return the current basic block.
+ MachineBasicBlock *getBlock() { return MBB; }
+
+ /// getInsertPos - Return the current insertion position.
+ MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+ /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+ /// at the given position in the given block.
+ InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+
+private:
+ void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+ void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 000000000000..81634096c1ba
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,4422 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalizedag"
+
+namespace {
+
+struct FloatSignAsInt;
+
+//===----------------------------------------------------------------------===//
+/// This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it. This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing. For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+class SelectionDAGLegalize {
+ const TargetMachine &TM;
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+
+ /// \brief The set of nodes which have already been legalized. We hold a
+ /// reference to it in order to update as necessary on node deletion.
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes;
+
+ /// \brief A set of all the nodes updated during legalization.
+ SmallSetVector<SDNode *, 16> *UpdatedNodes;
+
+ EVT getSetCCResultType(EVT VT) const {
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ }
+
+ // Libcall insertion helpers.
+
+public:
+ SelectionDAGLegalize(SelectionDAG &DAG,
+ SmallPtrSetImpl<SDNode *> &LegalizedNodes,
+ SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr)
+ : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG),
+ LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {}
+
+ /// \brief Legalizes the given operation.
+ void LegalizeOp(SDNode *Node);
+
+private:
+ SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+ void LegalizeLoadOps(SDNode *Node);
+ void LegalizeStoreOps(SDNode *Node);
+
+ /// Some targets cannot handle a variable
+ /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it
+ /// is necessary to spill the vector being inserted into to memory, perform
+ /// the insert there, and then read the result back.
+ SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+ const SDLoc &dl);
+ SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx,
+ const SDLoc &dl);
+
+ /// Return a vector shuffle operation which
+ /// performs the same shuffe in terms of order or result bytes, but on a type
+ /// whose vector element type is narrower than the original shuffle type.
+ /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+ SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, const SDLoc &dl,
+ SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const;
+
+ bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+ bool &NeedInvert, const SDLoc &dl);
+
+ SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+ unsigned NumOps, bool isSigned, const SDLoc &dl);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128);
+ void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
+ const SDLoc &dl);
+ SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+ void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results);
+ void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL,
+ SDValue Value) const;
+ SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL,
+ SDValue NewIntValue) const;
+ SDValue ExpandFCOPYSIGN(SDNode *Node) const;
+ SDValue ExpandFABS(SDNode *Node) const;
+ SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+ const SDLoc &dl);
+ SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
+ const SDLoc &dl);
+ SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
+ const SDLoc &dl);
+
+ SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
+ SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
+ SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl);
+
+ SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
+ SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+ SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+ SDValue ExpandConstant(ConstantSDNode *CP);
+
+ // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall
+ bool ExpandNode(SDNode *Node);
+ void ConvertNodeToLibcall(SDNode *Node);
+ void PromoteNode(SDNode *Node);
+
+public:
+ // Node replacement helpers
+ void ReplacedNode(SDNode *N) {
+ LegalizedNodes.erase(N);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(N);
+ }
+ void ReplaceNode(SDNode *Old, SDNode *New) {
+ DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ assert(Old->getNumValues() == New->getNumValues() &&
+ "Replacing one node with another that produces a different number "
+ "of values!");
+ DAG.ReplaceAllUsesWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New);
+ ReplacedNode(Old);
+ }
+ void ReplaceNode(SDValue Old, SDValue New) {
+ DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ DAG.ReplaceAllUsesWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
+ ReplacedNode(Old.getNode());
+ }
+ void ReplaceNode(SDNode *Old, const SDValue *New) {
+ DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
+
+ DAG.ReplaceAllUsesWith(Old, New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) {
+ DEBUG(dbgs() << (i == 0 ? " with: "
+ : " and: ");
+ New[i]->dump(&DAG));
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New[i].getNode());
+ }
+ ReplacedNode(Old);
+ }
+};
+}
+
+/// Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType(
+ EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2,
+ ArrayRef<int> Mask) const {
+ unsigned NumMaskElts = VT.getVectorNumElements();
+ unsigned NumDestElts = NVT.getVectorNumElements();
+ unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+ assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+ if (NumEltsGrowth == 1)
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask);
+
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ int Idx = Mask[i];
+ for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+ if (Idx < 0)
+ NewMask.push_back(-1);
+ else
+ NewMask.push_back(Idx * NumEltsGrowth + j);
+ }
+ }
+ assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+ assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+ return DAG.getVectorShuffle(NVT, dl, N1, N2, NewMask);
+}
+
+/// Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
+ bool Extend = false;
+ SDLoc dl(CFP);
+
+ // If a FP immediate is precise when represented as a float and if the
+ // target can do an extending load from float to double, we put it into
+ // the constant pool as a float, even if it's is statically typed as a
+ // double. This shrinks FP constants and canonicalizes them for targets where
+ // an FP extending load is the same cost as a normal load (such as on the x87
+ // fp stack or PPC FP unit).
+ EVT VT = CFP->getValueType(0);
+ ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+ if (!UseCP) {
+ assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+ return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl,
+ (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+ }
+
+ EVT OrigVT = VT;
+ EVT SVT = VT;
+ while (SVT != MVT::f32 && SVT != MVT::f16) {
+ SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+ if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
+ // Only do this if the target has a native EXTLOAD instruction from
+ // smaller type.
+ TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
+ TLI.ShouldShrinkFPConstant(OrigVT)) {
+ Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+ LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+ VT = SVT;
+ Extend = true;
+ }
+ }
+
+ SDValue CPIdx =
+ DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ if (Extend) {
+ SDValue Result = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT,
+ Alignment);
+ return Result;
+ }
+ SDValue Result = DAG.getLoad(
+ OrigVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
+ return Result;
+}
+
+/// Expands the Constant node to a load from the constant pool.
+SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
+ SDLoc dl(CP);
+ EVT VT = CP->getValueType(0);
+ SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
+ TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue Result = DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
+ return Result;
+}
+
+/// Some target cannot handle a variable insertion index for the
+/// INSERT_VECTOR_ELT instruction. In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec,
+ SDValue Val,
+ SDValue Idx,
+ const SDLoc &dl) {
+ SDValue Tmp1 = Vec;
+ SDValue Tmp2 = Val;
+ SDValue Tmp3 = Idx;
+
+ // If the target doesn't support this, we have to spill the input vector
+ // to a temporary stack slot, update the element, then reload it. This is
+ // badness. We could also load the value into a vector register (either
+ // with a "move to register" or "extload into register" instruction, then
+ // permute it into place, if the idx is a constant and if the idx is
+ // supported by the target.
+ EVT VT = Tmp1.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxVT = Tmp3.getValueType();
+ EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+ // Store the vector.
+ SDValue Ch = DAG.getStore(
+ DAG.getEntryNode(), dl, Tmp1, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+
+ // Truncate or zero extend offset to target pointer type.
+ Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
+ // Add the offset to the index.
+ unsigned EltSize = EltVT.getSizeInBits()/8;
+ Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
+ DAG.getConstant(EltSize, dl, IdxVT));
+ SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+ // Store the scalar value.
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT);
+ // Load the updated vector.
+ return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), SPFI));
+}
+
+SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+ SDValue Idx,
+ const SDLoc &dl) {
+ if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+ // SCALAR_TO_VECTOR requires that the type of the value being inserted
+ // match the element type of the vector being created, except for
+ // integers in which case the inserted value can be over width.
+ EVT EltVT = Vec.getValueType().getVectorElementType();
+ if (Val.getValueType() == EltVT ||
+ (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+ SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ Vec.getValueType(), Val);
+
+ unsigned NumElts = Vec.getValueType().getVectorNumElements();
+ // We generate a shuffle of InVec and ScVec, so the shuffle mask
+ // should be 0,1,2,3,4,5... with the appropriate element replaced with
+ // elt 0 of the RHS.
+ SmallVector<int, 8> ShufOps;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+ return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps);
+ }
+ }
+ return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+ // FIXME: We shouldn't do this for TargetConstantFP's.
+ // FIXME: move this to the DAG Combiner! Note that we can't regress due
+ // to phase ordering between legalized code and the dag combiner. This
+ // probably means that we need to integrate dag combiner and legalizer
+ // together.
+ // We generally can't do this one for long doubles.
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDLoc dl(ST);
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+ if (CFP->getValueType(0) == MVT::f32 &&
+ TLI.isTypeLegal(MVT::i32)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().
+ bitcastToAPInt().zextOrTrunc(32),
+ SDLoc(CFP), MVT::i32);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment,
+ MMOFlags, AAInfo);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
+ // If this target supports 64-bit registers, do a single 64-bit store.
+ if (TLI.isTypeLegal(MVT::i64)) {
+ SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+ zextOrTrunc(64), SDLoc(CFP), MVT::i64);
+ return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
+ }
+
+ if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
+ // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+ // stores. If the target supports neither 32- nor 64-bits, this
+ // xform is certainly not worth it.
+ const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt();
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32);
+ SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment,
+ MMOFlags, AAInfo);
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4, dl, Ptr.getValueType()));
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(4),
+ MinAlign(Alignment, 4U), MMOFlags, AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+ }
+ }
+ return SDValue(nullptr, 0);
+}
+
+void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDLoc dl(Node);
+
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ if (!ST->isTruncatingStore()) {
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ {
+ SDValue Value = ST->getValue();
+ MVT VT = Value.getSimpleValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ EVT MemVT = ST->getMemoryVT();
+ unsigned AS = ST->getAddressSpace();
+ unsigned Align = ST->getAlignment();
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ }
+ break;
+ }
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote stores to same size type");
+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+ } else {
+ SDValue Value = ST->getValue();
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ auto &DL = DAG.getDataLayout();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
+ Alignment, MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (DL.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT, Alignment, MMOFlags, AAInfo);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(
+ Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ } else {
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+ RoundVT, Alignment, MMOFlags, AAInfo);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Lo = DAG.getTruncStore(
+ Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ EVT MemVT = ST->getMemoryVT();
+ unsigned AS = ST->getAddressSpace();
+ unsigned Align = ST->getAlignment();
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ }
+ break;
+ }
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ assert(TLI.isTypeLegal(StVT) &&
+ "Do not know how to expand this store!");
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ }
+}
+
+void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
+ SDValue Chain = LD->getChain(); // The chain.
+ SDValue Ptr = LD->getBasePtr(); // The base pointer.
+ SDValue Value; // The value returned by the load op.
+ SDLoc dl(Node);
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD) {
+ MVT VT = Node->getSimpleValueType(0);
+ SDValue RVal = SDValue(Node, 0);
+ SDValue RChain = SDValue(Node, 1);
+
+ switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ EVT MemVT = LD->getMemoryVT();
+ unsigned AS = LD->getAddressSpace();
+ unsigned Align = LD->getAlignment();
+ const DataLayout &DL = DAG.getDataLayout();
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
+ }
+ break;
+ }
+ case TargetLowering::Custom: {
+ if (SDValue Res = TLI.LowerOperation(RVal, DAG)) {
+ RVal = Res;
+ RChain = Res.getValue(1);
+ }
+ break;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote loads to same size type");
+
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand());
+ RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+ RChain = Res.getValue(1);
+ break;
+ }
+ }
+ if (RChain.getNode() != Node) {
+ assert(RVal.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(RVal.getNode());
+ UpdatedNodes->insert(RChain.getNode());
+ }
+ ReplacedNode(Node);
+ }
+ return;
+ }
+
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) ==
+ TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ SDValue Result =
+ DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Value = Result;
+ Chain = Ch;
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+ auto &DL = DAG.getDataLayout();
+
+ if (DL.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
+ AAInfo);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
+ AAInfo);
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(
+ ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Hi.getValueType(), DL)));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo(), RoundVT, Alignment, MMOFlags,
+ AAInfo);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
+ AAInfo);
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(
+ ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Hi.getValueType(), DL)));
+
+ // Join the hi and lo parts.
+ Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
+
+ Chain = Ch;
+ } else {
+ bool isCustom = false;
+ switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0),
+ SrcVT.getSimpleVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal: {
+ Value = SDValue(Node, 0);
+ Chain = SDValue(Node, 1);
+
+ if (isCustom) {
+ if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
+ Value = Res;
+ Chain = Res.getValue(1);
+ }
+ } else {
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ EVT MemVT = LD->getMemoryVT();
+ unsigned AS = LD->getAddressSpace();
+ unsigned Align = LD->getAlignment();
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG);
+ }
+ }
+ break;
+ }
+ case TargetLowering::Expand:
+ EVT DestVT = Node->getValueType(0);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
+ // If the source type is not legal, see if there is a legal extload to
+ // an intermediate type that we can then extend further.
+ EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
+ if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
+ TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) {
+ // If we are loading a legal type, this is a non-extload followed by a
+ // full extend.
+ ISD::LoadExtType MidExtType =
+ (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType;
+
+ SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr,
+ SrcVT, LD->getMemOperand());
+ unsigned ExtendOp =
+ ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType);
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
+ break;
+ }
+
+ // Handle the special case of fp16 extloads. EXTLOAD doesn't have the
+ // normal undefined upper bits behavior to allow using an in-reg extend
+ // with the illegal FP type, so load as an integer and do the
+ // from-integer conversion.
+ if (SrcVT.getScalarType() == MVT::f16) {
+ EVT ISrcVT = SrcVT.changeTypeToInteger();
+ EVT IDestVT = DestVT.changeTypeToInteger();
+ EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
+ Chain, Ptr, ISrcVT,
+ LD->getMemOperand());
+ Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+
+ assert(!SrcVT.isVector() &&
+ "Vector Loads are handled in LegalizeVectorOps");
+
+ // FIXME: This does not work for vectors on most targets. Sign-
+ // and zero-extend operations are currently folded into extending
+ // loads, whether they are legal or not, and then we end up here
+ // without any support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an
+ // explicit zero/sign extend inreg.
+ SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl,
+ Node->getValueType(0),
+ Chain, Ptr, SrcVT,
+ LD->getMemOperand());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Value = ValRes;
+ Chain = Result.getValue(1);
+ break;
+ }
+ }
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ if (Chain.getNode() != Node) {
+ assert(Value.getNode() != Node && "Load must be completely replaced");
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Value.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
+ ReplacedNode(Node);
+ }
+}
+
+/// Return a legal replacement for the given operation, with all legal operands.
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
+
+ if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ return;
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal ||
+ TLI.isTypeLegal(Node->getValueType(i))) &&
+ "Unexpected illegal type!");
+
+ for (const SDValue &Op : Node->op_values())
+ assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
+ TargetLowering::TypeLegal ||
+ TLI.isTypeLegal(Op.getValueType()) ||
+ Op.getOpcode() == ISD::TargetConstant) &&
+ "Unexpected illegal type!");
+#endif
+
+ // Figure out the correct action; the way to query this varies by opcode
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ bool SimpleFinishLegalizing = true;
+ switch (Node->getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::STACKSAVE:
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ break;
+ case ISD::VAARG:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ if (Action != TargetLowering::Promote)
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+ break;
+ case ISD::FP_TO_FP16:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::EXTRACT_VECTOR_ELT:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(0).getValueType());
+ break;
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(2).getValueType());
+ break;
+ }
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::BR_CC: {
+ unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::SETCC ? 2 :
+ Node->getOpcode() == ISD::SETCCE ? 3 : 1;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal) {
+ if (Node->getOpcode() == ISD::SELECT_CC)
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getValueType(0));
+ else
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ }
+ break;
+ }
+ case ISD::LOAD:
+ case ISD::STORE:
+ // FIXME: Model these properly. LOAD and STORE are complicated, and
+ // STORE expects the unlegalized operand in some cases.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ // FIXME: This shouldn't be necessary. These nodes have special properties
+ // dealing with the recursive nature of legalization. Removing this
+ // special case should be done as part of making LegalizeDAG non-recursive.
+ SimpleFinishLegalizing = false;
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ case ISD::FLT_ROUNDS_:
+ case ISD::FPOWI:
+ case ISD::MERGE_VALUES:
+ case ISD::EH_RETURN:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_SJLJ_SETJMP:
+ case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_SETUP_DISPATCH:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be expanded.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+ case ISD::INIT_TRAMPOLINE:
+ case ISD::ADJUST_TRAMPOLINE:
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ // These operations lie about being legal: when they claim to be legal,
+ // they should actually be custom-lowered.
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Custom;
+ break;
+ case ISD::READCYCLECOUNTER:
+ // READCYCLECOUNTER returns an i64, even if type legalization might have
+ // expanded that to several smaller types.
+ Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
+ break;
+ case ISD::READ_REGISTER:
+ case ISD::WRITE_REGISTER:
+ // Named register is legal in the DAG, but blocked by register name
+ // selection if not implemented by target (to chose the correct register)
+ // They'll be converted to Copy(To/From)Reg.
+ Action = TargetLowering::Legal;
+ break;
+ case ISD::DEBUGTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::DEBUGTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
+
+ default:
+ if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+ Action = TargetLowering::Legal;
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ }
+ break;
+ }
+
+ if (SimpleFinishLegalizing) {
+ SDNode *NewNode = Node;
+ switch (Node->getOpcode()) {
+ default: break;
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Node->getOperand(1).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(1));
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Handle.getValue());
+ }
+ break;
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS:
+ case ISD::SHL_PARTS:
+ // Legalizing shifts/rotates requires adjusting the shift amount
+ // to the appropriate width.
+ if (!Node->getOperand(2).getValueType().isVector()) {
+ SDValue SAO =
+ DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
+ Node->getOperand(2));
+ HandleSDNode Handle(SAO);
+ LegalizeOp(SAO.getNode());
+ NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
+ Node->getOperand(1),
+ Handle.getValue());
+ }
+ break;
+ }
+
+ if (NewNode != Node) {
+ ReplaceNode(Node, NewNode);
+ Node = NewNode;
+ }
+ switch (Action) {
+ case TargetLowering::Legal:
+ return;
+ case TargetLowering::Custom: {
+ // FIXME: The handling for custom lowering with multiple results is
+ // a complete mess.
+ if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
+ if (!(Res.getNode() != Node || Res.getResNo() != 0))
+ return;
+
+ if (Node->getNumValues() == 1) {
+ // We can just directly replace this node with the lowered value.
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+
+ SmallVector<SDValue, 8> ResultVals;
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ ResultVals.push_back(Res.getValue(i));
+ ReplaceNode(Node, ResultVals.data());
+ return;
+ }
+ }
+ // FALL THROUGH
+ case TargetLowering::Expand:
+ if (ExpandNode(Node))
+ return;
+ // FALL THROUGH
+ case TargetLowering::LibCall:
+ ConvertNodeToLibcall(Node);
+ return;
+ case TargetLowering::Promote:
+ PromoteNode(Node);
+ return;
+ }
+ }
+
+ switch (Node->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "NODE: ";
+ Node->dump( &DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to legalize this operator!");
+
+ case ISD::CALLSEQ_START:
+ case ISD::CALLSEQ_END:
+ break;
+ case ISD::LOAD: {
+ return LegalizeLoadOps(Node);
+ }
+ case ISD::STORE: {
+ return LegalizeStoreOps(Node);
+ }
+ }
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ // Before we generate a new store to a temporary stack slot, see if there is
+ // already one that we can use. There often is because when we scalarize
+ // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole
+ // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in
+ // the vector. If all are expanded here, we don't want one store per vector
+ // element.
+
+ // Caches for hasPredecessorHelper
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(Idx.getNode());
+ SDValue StackPtr, Ch;
+ for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
+ UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {
+ if (ST->isIndexed() || ST->isTruncatingStore() ||
+ ST->getValue() != Vec)
+ continue;
+
+ // Make sure that nothing else could have stored into the destination of
+ // this store.
+ if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode()))
+ continue;
+
+ // If the index is dependent on the store we will introduce a cycle when
+ // creating the load (the load uses the index, and by replacing the chain
+ // we will make the index dependent on the load).
+ if (SDNode::hasPredecessorHelper(ST, Visited, Worklist))
+ continue;
+
+ StackPtr = ST->getBasePtr();
+ Ch = SDValue(ST, 0);
+ break;
+ }
+ }
+
+ if (!Ch.getNode()) {
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+ StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo());
+ }
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
+
+ Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
+ StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+ SDValue NewLoad;
+
+ if (Op.getValueType().isVector())
+ NewLoad =
+ DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo());
+ else
+ NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType());
+
+ // Replace the chain going out of the store, by the one out of the load.
+ DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
+
+ // We introduced a cycle though, so update the loads operands, making sure
+ // to use the original store's chain as an incoming chain.
+ SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(),
+ NewLoad->op_end());
+ NewLoadOperands[0] = Ch;
+ NewLoad =
+ SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0);
+ return NewLoad;
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ SDLoc dl(Op);
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
+
+ // Then store the inserted part.
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
+ Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
+
+ SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ StackPtr);
+
+ // Store the subvector.
+ Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo());
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+ // We can't handle this case efficiently. Allocate a sufficiently
+ // aligned object on the stack, store each element into it, then load
+ // the result as a vector.
+ // Create the stack frame object.
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(Node);
+ SDValue FIPtr = DAG.CreateStackTemporary(VT);
+ int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+
+ // Emit a store of each element to the stack slot.
+ SmallVector<SDValue, 8> Stores;
+ unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ // Store (in the right endianness) the elements to memory.
+ for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Node->getOperand(i).isUndef()) continue;
+
+ unsigned Offset = TypeByteSize*i;
+
+ SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+ // If the destination vector element type is narrower than the source
+ // element type, only store the bits necessary.
+ if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset), EltVT));
+ } else
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i),
+ Idx, PtrInfo.getWithOffset(Offset)));
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty()) // Not all undef elements?
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Result is a load from the stack slot.
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo);
+}
+
+namespace {
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+ EVT FloatVT;
+ SDValue Chain;
+ SDValue FloatPtr;
+ SDValue IntPtr;
+ MachinePointerInfo IntPointerInfo;
+ MachinePointerInfo FloatPointerInfo;
+ SDValue IntValue;
+ APInt SignMask;
+ uint8_t SignBit;
+};
+}
+
+/// Bitcast a floating-point value to an integer value. Only bitcast the part
+/// containing the sign bit if the target has no integer value capable of
+/// holding all bits of the floating-point value.
+void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
+ const SDLoc &DL,
+ SDValue Value) const {
+ EVT FloatVT = Value.getValueType();
+ unsigned NumBits = FloatVT.getSizeInBits();
+ State.FloatVT = FloatVT;
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+ // Convert to an integer of the same size.
+ if (TLI.isTypeLegal(IVT)) {
+ State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
+ State.SignMask = APInt::getSignBit(NumBits);
+ State.SignBit = NumBits - 1;
+ return;
+ }
+
+ auto &DataLayout = DAG.getDataLayout();
+ // Store the float to memory, then load the sign part out as an integer.
+ MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8);
+ // First create a temporary that is aligned for both the load and store.
+ SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ // Then store the float to it.
+ State.FloatPtr = StackPtr;
+ MachineFunction &MF = DAG.getMachineFunction();
+ State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
+ State.FloatPointerInfo);
+
+ SDValue IntPtr;
+ if (DataLayout.isBigEndian()) {
+ assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+ // Load out a legal integer with the same sign bit as the float.
+ IntPtr = StackPtr;
+ State.IntPointerInfo = State.FloatPointerInfo;
+ } else {
+ // Advance the pointer so that the loaded byte will contain the sign bit.
+ unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
+ IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+ State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
+ ByteOffset);
+ }
+
+ State.IntPtr = IntPtr;
+ State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr,
+ State.IntPointerInfo, MVT::i8);
+ State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
+ State.SignBit = 7;
+}
+
+/// Replace the integer value produced by getSignAsIntValue() with a new value
+/// and cast the result back to a floating-point type.
+SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
+ const SDLoc &DL,
+ SDValue NewIntValue) const {
+ if (!State.Chain)
+ return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
+
+ // Override the part containing the sign bit in the value stored on the stack.
+ SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr,
+ State.IntPointerInfo, MVT::i8);
+ return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
+ State.FloatPointerInfo);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Mag = Node->getOperand(0);
+ SDValue Sign = Node->getOperand(1);
+
+ // Get sign bit into an integer value.
+ FloatSignAsInt SignAsInt;
+ getSignAsIntValue(SignAsInt, DL, Sign);
+
+ EVT IntVT = SignAsInt.IntValue.getValueType();
+ SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue,
+ SignMask);
+
+ // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X)
+ EVT FloatVT = Mag.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) {
+ SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag);
+ SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue);
+ SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit,
+ DAG.getConstant(0, DL, IntVT), ISD::SETNE);
+ return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
+ }
+
+ // Transform Mag value to integer, and clear the sign bit.
+ FloatSignAsInt MagAsInt;
+ getSignAsIntValue(MagAsInt, DL, Mag);
+ EVT MagVT = MagAsInt.IntValue.getValueType();
+ SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue,
+ ClearSignMask);
+
+ // Get the signbit at the right position for MagAsInt.
+ int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
+ if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
+ if (ShiftAmount > 0) {
+ SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, IntVT);
+ SignBit = DAG.getNode(ISD::SRL, DL, IntVT, SignBit, ShiftCnst);
+ } else if (ShiftAmount < 0) {
+ SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, IntVT);
+ SignBit = DAG.getNode(ISD::SHL, DL, IntVT, SignBit, ShiftCnst);
+ }
+ SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
+ } else if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
+ SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
+ if (ShiftAmount > 0) {
+ SDValue ShiftCnst = DAG.getConstant(ShiftAmount, DL, MagVT);
+ SignBit = DAG.getNode(ISD::SRL, DL, MagVT, SignBit, ShiftCnst);
+ } else if (ShiftAmount < 0) {
+ SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, MagVT);
+ SignBit = DAG.getNode(ISD::SHL, DL, MagVT, SignBit, ShiftCnst);
+ }
+ }
+
+ // Store the part with the modified sign and convert back to float.
+ SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit);
+ return modifySignAsInt(MagAsInt, DL, CopiedSign);
+}
+
+SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
+ SDLoc DL(Node);
+ SDValue Value = Node->getOperand(0);
+
+ // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal.
+ EVT FloatVT = Value.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) {
+ SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT);
+ return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero);
+ }
+
+ // Transform value to integer, clear the sign bit and transform back.
+ FloatSignAsInt ValueAsInt;
+ getSignAsIntValue(ValueAsInt, DL, Value);
+ EVT IntVT = ValueAsInt.IntValue.getValueType();
+ SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue,
+ ClearSignMask);
+ return modifySignAsInt(ValueAsInt, DL, ClearedSign);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+ unsigned StackAlign =
+ DAG.getSubtarget().getFrameLowering()->getStackAlignment();
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ if (Align > StackAlign)
+ Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
+ DAG.getConstant(-(uint64_t)Align, dl, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
+
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp2);
+}
+
+/// Legalize a SETCC with given LHS and RHS and condition code CC on the current
+/// target.
+///
+/// If the SETCC has been legalized using AND / OR, then the legalized node
+/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert
+/// will be set to false.
+///
+/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
+/// then the values of LHS and RHS will be swapped, CC will be set to the
+/// new condition, and NeedInvert will be set to false.
+///
+/// If the SETCC has been legalized using the inverse condcode, then LHS and
+/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
+/// will be set to true. The caller must invert the result of the SETCC with
+/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect
+/// of a true/false result.
+///
+/// \returns true if the SetCC has been legalized, false if it hasn't.
+bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
+ SDValue &RHS, SDValue &CC,
+ bool &NeedInvert,
+ const SDLoc &dl) {
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+ NeedInvert = false;
+ switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+ default: llvm_unreachable("Unknown condition code action!");
+ case TargetLowering::Legal:
+ // Nothing to do.
+ break;
+ case TargetLowering::Expand: {
+ ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
+ if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(InvCC);
+ return true;
+ }
+ ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+ unsigned Opc = 0;
+ switch (CCCode) {
+ default: llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETO:
+ assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT)
+ == TargetLowering::Legal
+ && "If SETO is expanded, SETOEQ must be legal!");
+ CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
+ case ISD::SETUO:
+ assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT)
+ == TargetLowering::Legal
+ && "If SETUO is expanded, SETUNE must be legal!");
+ CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If we are floating point, assign and break, otherwise fall through.
+ if (!OpVT.isInteger()) {
+ // We can use the 4th bit to tell if we are the unordered
+ // or ordered version of the opcode.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
+ CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
+ break;
+ }
+ // Fallthrough if we are unsigned integer.
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETLT:
+ // We only support using the inverted operation, which is computed above
+ // and not a different manner of supporting expanding these cases.
+ llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETNE:
+ case ISD::SETEQ:
+ // Try inverting the result of the inverse condition.
+ InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ;
+ if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+ CC = DAG.getCondCode(InvCC);
+ NeedInvert = true;
+ return true;
+ }
+ // If inverting the condition didn't work then we have no means to expand
+ // the condition.
+ llvm_unreachable("Don't know how to expand this condition!");
+ }
+
+ SDValue SetCC1, SetCC2;
+ if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
+ // If we aren't the ordered or unorder operation,
+ // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ } else {
+ // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+ }
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ RHS = SDValue();
+ CC = SDValue();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Emit a store/load combination to the stack. This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
+ EVT DestVT, const SDLoc &dl) {
+ // Create the stack frame object.
+ unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
+ SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+ int SPFI = StackPtrFI->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+
+ unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType);
+
+ // Emit a store to the stack slot. Use a truncstore if the input value is
+ // later than DestVT.
+ SDValue Store;
+
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo,
+ SlotVT, SrcAlign);
+ else {
+ assert(SrcSize == SlotSize && "Invalid store");
+ Store =
+ DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
+ }
+
+ // Result is a load from the stack slot.
+ if (SlotSize == DestSize)
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
+
+ assert(SlotSize < DestSize && "Unknown extension!");
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
+ DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+ SDLoc dl(Node);
+ // Create a vector sized/aligned stack slot, store the value to element #0,
+ // then load the whole vector back out.
+ SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+ FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+ int SPFI = StackPtrFI->getIndex();
+
+ SDValue Ch = DAG.getTruncStore(
+ DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI),
+ Node->getValueType(0).getVectorElementType());
+ return DAG.getLoad(
+ Node->getValueType(0), dl, Ch, StackPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI));
+}
+
+static bool
+ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDValue &Res) {
+ unsigned NumElems = Node->getNumOperands();
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+
+ // Try to group the scalars into pairs, shuffle the pairs together, then
+ // shuffle the pairs of pairs together, etc. until the vector has
+ // been built. This will work only if all of the necessary shuffle masks
+ // are legal.
+
+ // We do this in two phases; first to check the legality of the shuffles,
+ // and next, assuming that all shuffles are legal, to create the new nodes.
+ for (int Phase = 0; Phase < 2; ++Phase) {
+ SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals,
+ NewIntermedVals;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.isUndef())
+ continue;
+
+ SDValue Vec;
+ if (Phase)
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V);
+ IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i)));
+ }
+
+ while (IntermedVals.size() > 2) {
+ NewIntermedVals.clear();
+ for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) {
+ // This vector and the next vector are shuffled together (simply to
+ // append the one to the other).
+ SmallVector<int, 16> ShuffleVec(NumElems, -1);
+
+ SmallVector<int, 16> FinalIndices;
+ FinalIndices.reserve(IntermedVals[i].second.size() +
+ IntermedVals[i+1].second.size());
+
+ int k = 0;
+ for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f;
+ ++j, ++k) {
+ ShuffleVec[k] = j;
+ FinalIndices.push_back(IntermedVals[i].second[j]);
+ }
+ for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f;
+ ++j, ++k) {
+ ShuffleVec[k] = NumElems + j;
+ FinalIndices.push_back(IntermedVals[i+1].second[j]);
+ }
+
+ SDValue Shuffle;
+ if (Phase)
+ Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first,
+ IntermedVals[i+1].first,
+ ShuffleVec);
+ else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+ return false;
+ NewIntermedVals.push_back(
+ std::make_pair(Shuffle, std::move(FinalIndices)));
+ }
+
+ // If we had an odd number of defined values, then append the last
+ // element to the array of new vectors.
+ if ((IntermedVals.size() & 1) != 0)
+ NewIntermedVals.push_back(IntermedVals.back());
+
+ IntermedVals.swap(NewIntermedVals);
+ }
+
+ assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 &&
+ "Invalid number of intermediate vectors");
+ SDValue Vec1 = IntermedVals[0].first;
+ SDValue Vec2;
+ if (IntermedVals.size() > 1)
+ Vec2 = IntermedVals[1].first;
+ else if (Phase)
+ Vec2 = DAG.getUNDEF(VT);
+
+ SmallVector<int, 16> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i)
+ ShuffleVec[IntermedVals[0].second[i]] = i;
+ for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i)
+ ShuffleVec[IntermedVals[1].second[i]] = NumElems + i;
+
+ if (Phase)
+ Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec);
+ else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+ return false;
+ }
+
+ return true;
+}
+
+/// Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+ unsigned NumElems = Node->getNumOperands();
+ SDValue Value1, Value2;
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Node->getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ // If the only non-undef value is the low element, turn this into a
+ // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X.
+ bool isOnlyLowElement = true;
+ bool MoreThanTwoValues = false;
+ bool isConstant = true;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.isUndef())
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value1.getNode()) {
+ Value1 = V;
+ } else if (!Value2.getNode()) {
+ if (V != Value1)
+ Value2 = V;
+ } else if (V != Value1 && V != Value2) {
+ MoreThanTwoValues = true;
+ }
+ }
+
+ if (!Value1.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+ // If all elements are constants, create a load from the constant pool.
+ if (isConstant) {
+ SmallVector<Constant*, 16> CV;
+ for (unsigned i = 0, e = NumElems; i != e; ++i) {
+ if (ConstantFPSDNode *V =
+ dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+ CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+ } else if (ConstantSDNode *V =
+ dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+ if (OpVT==EltVT)
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ else {
+ // If OpVT and EltVT don't match, EltVT is not legal and the
+ // element values have been promoted/truncated earlier. Undo this;
+ // we don't want a v16i8 to become a v16i32 for example.
+ const ConstantInt *CI = V->getConstantIntValue();
+ CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+ CI->getZExtValue()));
+ }
+ } else {
+ assert(Node->getOperand(i).isUndef());
+ Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+ CV.push_back(UndefValue::get(OpNTy));
+ }
+ }
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx =
+ DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ return DAG.getLoad(
+ VT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ Alignment);
+ }
+
+ SmallSet<SDValue, 16> DefinedValues;
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Node->getOperand(i).isUndef())
+ continue;
+ DefinedValues.insert(Node->getOperand(i));
+ }
+
+ if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) {
+ if (!MoreThanTwoValues) {
+ SmallVector<int, 8> ShuffleVec(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue V = Node->getOperand(i);
+ if (V.isUndef())
+ continue;
+ ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+ }
+ if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+ // Get the splatted value into the low element of a vector register.
+ SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+ SDValue Vec2;
+ if (Value2.getNode())
+ Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+ else
+ Vec2 = DAG.getUNDEF(VT);
+
+ // Return shuffle(LowValVec, undef, <0,0,0,0>)
+ return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec);
+ }
+ } else {
+ SDValue Res;
+ if (ExpandBVWithShuffles(Node, DAG, TLI, Res))
+ return Res;
+ }
+ }
+
+ // Otherwise, we can't handle this case efficiently.
+ return ExpandVectorBuildThroughStack(Node);
+}
+
+// Expand a node into a call to a libcall. If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument. If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : Node->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ // By default, the input chain to this libcall is the entry node of the
+ // function. If the libcall is going to be emitted as a tail call then
+ // TLI.isUsedByReturnOnly will change it to the right chain if the return
+ // node which is being folded has a non-entry input chain.
+ SDValue InChain = DAG.getEntryNode();
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position and that the return types match.
+ SDValue TCChain = InChain;
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool isTailCall =
+ TLI.isInTailCallPosition(DAG, Node, TCChain) &&
+ (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy());
+ if (isTailCall)
+ InChain = TCChain;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
+ return CallInfo.first;
+}
+
+/// Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+ const SDValue *Ops, unsigned NumOps,
+ bool isSigned, const SDLoc &dl) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumOps);
+
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ Entry.Node = Ops[i];
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setSExtResult(isSigned).setZExtResult(!isSigned);
+
+ std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo.first;
+}
+
+// Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setSExtResult(isSigned).setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::f128: LC = Call_F128; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+ return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC = Call_I8; break;
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
+ case MVT::i128: LC = Call_I128; break;
+ }
+ return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// Issue libcalls to __{u}divmod to compute div / rem pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ unsigned Opcode = Node->getOpcode();
+ bool isSigned = Opcode == ISD::SDIVREM;
+
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
+ case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+ case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+ case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+ case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : Node->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the return address of the remainder.
+ SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = FIPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ SDLoc dl(Node);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setSExtResult(isSigned).setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ // Remainder is loaded back from the stack frame.
+ SDValue Rem =
+ DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo());
+ Results.push_back(CallInfo.first);
+ Results.push_back(Rem);
+}
+
+/// Return true if sincos libcall is available.
+static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+ return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Return true if sincos libcall is available and can be used to combine sin
+/// and cos.
+static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI,
+ const TargetMachine &TM) {
+ if (!isSinCosLibcallAvailable(Node, TLI))
+ return false;
+ // GNU sin/cos functions set errno while sincos does not. Therefore
+ // combining sin and cos is only safe if unsafe-fpmath is enabled.
+ if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath)
+ return false;
+ return true;
+}
+
+/// Only issue sincos libcall if both sin and cos are needed.
+static bool useSinCos(SDNode *Node) {
+ unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN
+ ? ISD::FCOS : ISD::FSIN;
+
+ SDValue Op0 = Node->getOperand(0);
+ for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+ UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User == Node)
+ continue;
+ // The other user might have been turned into sincos already.
+ if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS)
+ return true;
+ }
+ return false;
+}
+
+/// Issue libcalls to sincos to compute sin / cos pairs.
+void
+SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::SINCOS_F32; break;
+ case MVT::f64: LC = RTLIB::SINCOS_F64; break;
+ case MVT::f80: LC = RTLIB::SINCOS_F80; break;
+ case MVT::f128: LC = RTLIB::SINCOS_F128; break;
+ case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
+ }
+
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ EVT RetVT = Node->getValueType(0);
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // Pass the argument.
+ Entry.Node = Node->getOperand(0);
+ Entry.Ty = RetTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Pass the return address of sin.
+ SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = SinPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ // Also pass the return address of the cos.
+ SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
+ Entry.Node = CosPtr;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ SDLoc dl(Node);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC),
+ Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args));
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ Results.push_back(
+ DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo()));
+ Results.push_back(
+ DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo()));
+}
+
+/// This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it. At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
+ EVT DestVT,
+ const SDLoc &dl) {
+ // TODO: Should any fast-math-flags be set for the created nodes?
+
+ if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+ // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+ // Get the stack frame index of a 8 byte buffer.
+ SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+ // word offset constant for Hi/Lo address computation
+ SDValue WordOff = DAG.getConstant(sizeof(int), dl,
+ StackSlot.getValueType());
+ // set up Hi and Lo (into buffer) address based on endian
+ SDValue Hi = StackSlot;
+ SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
+ StackSlot, WordOff);
+ if (DAG.getDataLayout().isLittleEndian())
+ std::swap(Hi, Lo);
+
+ // if signed map to unsigned space
+ SDValue Op0Mapped;
+ if (isSigned) {
+ // constant used to invert sign bit (signed to unsigned mapping)
+ SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32);
+ Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+ } else {
+ Op0Mapped = Op0;
+ }
+ // store the lo of the constructed double - based on integer input
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo,
+ MachinePointerInfo());
+ // initial hi portion of constructed double
+ SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32);
+ // store the hi of the constructed double - biased exponent
+ SDValue Store2 =
+ DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo());
+ // load the constructed double
+ SDValue Load =
+ DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo());
+ // FP constant to bias correct the final result
+ SDValue Bias = DAG.getConstantFP(isSigned ?
+ BitsToDouble(0x4330000080000000ULL) :
+ BitsToDouble(0x4330000000000000ULL),
+ dl, MVT::f64);
+ // subtract the bias
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ // final result
+ SDValue Result;
+ // handle final rounding
+ if (DestVT == MVT::f64) {
+ // do nothing
+ Result = Sub;
+ } else if (DestVT.bitsLT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+ DAG.getIntPtrConstant(0, dl));
+ } else if (DestVT.bitsGT(MVT::f64)) {
+ Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+ }
+ return Result;
+ }
+ assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ // Code below here assumes !isSigned without checking again.
+
+ // Implementation of unsigned i64 to f64 following the algorithm in
+ // __floatundidf in compiler_rt. This implementation has the advantage
+ // of performing rounding correctly, both in the default rounding mode
+ // and in all alternate rounding modes.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+ SDValue TwoP52 =
+ DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
+ SDValue TwoP84PlusTwoP52 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl,
+ MVT::f64);
+ SDValue TwoP84 =
+ DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64);
+
+ SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+ DAG.getConstant(32, dl, MVT::i64));
+ SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+ SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+ SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+ SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
+ TwoP84PlusTwoP52);
+ return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+ }
+
+ // Implementation of unsigned i64 to f32.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ if (!isSigned) {
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+ SDValue ShiftConst = DAG.getConstant(
+ 1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout()));
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, MVT::i64);
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ //pseudo-op, or, even better, for whole-function isel.
+ SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
+ return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast);
+ }
+
+ // Otherwise, implement the fully general conversion.
+
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
+ DAG.getConstant(UINT64_C(0x800), dl, MVT::i64));
+ SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64));
+ SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2,
+ DAG.getConstant(UINT64_C(0), dl, MVT::i64),
+ ISD::SETNE);
+ SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0);
+ SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0,
+ DAG.getConstant(UINT64_C(0x0020000000000000), dl,
+ MVT::i64),
+ ISD::SETUGE);
+ SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0);
+ EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout());
+
+ SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
+ DAG.getConstant(32, dl, SHVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
+ SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
+ SDValue TwoP32 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl,
+ MVT::f64);
+ SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
+ SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
+ SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+ return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()),
+ Op0,
+ DAG.getConstant(0, dl, Op0.getValueType()),
+ ISD::SETLT);
+ SDValue Zero = DAG.getIntPtrConstant(0, dl),
+ Four = DAG.getIntPtrConstant(4, dl);
+ SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(),
+ SignSet, Four, Zero);
+
+ // If the sign bit of the integer is set, the large number will be treated
+ // as a negative number. To counteract this, the dynamic code adds an
+ // offset depending on the data type.
+ uint64_t FF;
+ switch (Op0.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unsupported integer type!");
+ case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
+ case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
+ case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
+ case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float)
+ }
+ if (DAG.getDataLayout().isLittleEndian())
+ FF <<= 32;
+ Constant *FudgeFactor = ConstantInt::get(
+ Type::getInt64Ty(*DAG.getContext()), FF);
+
+ SDValue CPIdx =
+ DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout()));
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset);
+ Alignment = std::min(Alignment, 4u);
+ SDValue FudgeInReg;
+ if (DestVT == MVT::f32)
+ FudgeInReg = DAG.getLoad(
+ MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ Alignment);
+ else {
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ Alignment);
+ HandleSDNode Handle(Load);
+ LegalizeOp(Load.getNode());
+ FudgeInReg = Handle.getValue();
+ }
+
+ return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
+ bool isSigned,
+ const SDLoc &dl) {
+ // First step, figure out the appropriate *INT_TO_FP operation to use.
+ EVT NewInTy = LegalOp.getValueType();
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+ assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+ // If the target supports SINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::SINT_TO_FP;
+ break;
+ }
+ if (isSigned) continue;
+
+ // If the target supports UINT_TO_FP of this type, use it.
+ if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+ OpToUse = ISD::UINT_TO_FP;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+ // Okay, we found the operation and type to use. Zero extend our input to the
+ // desired type then run the operation on it.
+ return DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp));
+}
+
+/// This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it. At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
+ bool isSigned,
+ const SDLoc &dl) {
+ // First step, figure out the appropriate FP_TO*INT operation to use.
+ EVT NewOutTy = DestVT;
+
+ unsigned OpToUse = 0;
+
+ // Scan for the appropriate larger type to use.
+ while (1) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ // A larger signed type can hold all unsigned values of the requested type,
+ // so using FP_TO_SINT is valid
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_SINT;
+ break;
+ }
+
+ // However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
+ if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ OpToUse = ISD::FP_TO_UINT;
+ break;
+ }
+
+ // Otherwise, try a larger type.
+ }
+
+
+ // Okay, we found the operation and type to use.
+ SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+ // Truncate the result of the extended FP_TO_*INT operation to the desired
+ // size.
+ return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// Open code the operations for BITREVERSE.
+SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ SDValue Tmp, Tmp2;
+ Tmp = DAG.getConstant(0, dl, VT);
+ for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
+ if (I < J)
+ Tmp2 =
+ DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
+ else
+ Tmp2 =
+ DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
+
+ APInt Shift(Sz, 1);
+ Shift = Shift.shl(J);
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
+ Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
+ }
+
+ return Tmp;
+}
+
+/// Open code the operations for BSWAP of the specified operation.
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
+ EVT VT = Op.getValueType();
+ EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled Expand type in BSWAP!");
+ case MVT::i16:
+ Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ case MVT::i32:
+ Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+ DAG.getConstant(0xFF0000, dl, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ case MVT::i64:
+ Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+ Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+ Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
+ Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
+ Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
+ Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7,
+ DAG.getConstant(255ULL<<48, dl, VT));
+ Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6,
+ DAG.getConstant(255ULL<<40, dl, VT));
+ Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5,
+ DAG.getConstant(255ULL<<32, dl, VT));
+ Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
+ DAG.getConstant(255ULL<<24, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
+ DAG.getConstant(255ULL<<16, dl, VT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
+ DAG.getConstant(255ULL<<8 , dl, VT));
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+ Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+ Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+ Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+ Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+ return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+ }
+}
+
+/// Expand the specified bitcount instruction into operations.
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+ const SDLoc &dl) {
+ switch (Opc) {
+ default: llvm_unreachable("Cannot expand this yet!");
+ case ISD::CTPOP: {
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Len = VT.getSizeInBits();
+
+ assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+ "CTPOP not implemented for this type.");
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+ SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)),
+ dl, VT);
+ SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)),
+ dl, VT);
+ SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)),
+ dl, VT);
+ SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)),
+ dl, VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, dl, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, dl, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, dl, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ Op = DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, dl, ShVT));
+
+ return Op;
+ }
+ case ISD::CTLZ_ZERO_UNDEF:
+ // This trivially expands to CTLZ.
+ return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
+ case ISD::CTLZ: {
+ EVT VT = Op.getValueType();
+ unsigned len = VT.getSizeInBits();
+
+ if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT = getSetCCResultType(VT);
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(len, dl, VT), CTLZ);
+ }
+
+ // for now, we do this:
+ // x = x | (x >> 1);
+ // x = x | (x >> 2);
+ // ...
+ // x = x | (x >>16);
+ // x = x | (x >>32); // for 64-bit input
+ // return popcount(~x);
+ //
+ // Ref: "Hacker's Delight" by Henry Warren
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
+ Op = DAG.getNode(ISD::OR, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+ }
+ Op = DAG.getNOT(dl, Op, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ }
+ case ISD::CTTZ_ZERO_UNDEF:
+ // This trivially expands to CTTZ.
+ return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
+ case ISD::CTTZ: {
+ // for now, we use: { return popcount(~x & (x - 1)); }
+ // unless the target has ctlz but not ctpop, in which case we use:
+ // { return 32 - nlz(~x & (x-1)); }
+ // Ref: "Hacker's Delight" by Henry Warren
+ EVT VT = Op.getValueType();
+ SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNOT(dl, Op, VT),
+ DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getConstant(1, dl, VT)));
+ // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+ if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(VT.getSizeInBits(), dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+ }
+ }
+}
+
+bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ bool NeedInvert;
+ switch (Node->getOpcode()) {
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BITREVERSE:
+ Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
+ break;
+ case ISD::BSWAP:
+ Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+ break;
+ case ISD::FRAMEADDR:
+ case ISD::RETURNADDR:
+ case ISD::FRAME_TO_ARGS_OFFSET:
+ Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+ break;
+ case ISD::FLT_ROUNDS_:
+ Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0)));
+ break;
+ case ISD::EH_RETURN:
+ case ISD::EH_LABEL:
+ case ISD::PREFETCH:
+ case ISD::VAEND:
+ case ISD::EH_SJLJ_LONGJMP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::READCYCLECOUNTER:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.append(Node->getNumValues() - 1,
+ DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
+ Results.push_back(DAG.getConstant(0, dl, MVT::i32));
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::ATOMIC_LOAD: {
+ // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
+ SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0));
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(0));
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_STORE: {
+ // There is no libcall for atomic store; fake it with ATOMIC_SWAP.
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(Node)->getMemoryVT(),
+ Node->getOperand(0),
+ Node->getOperand(1), Node->getOperand(2),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+ Results.push_back(Swap.getValue(1));
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
+ // splits out the success value as a comparison. Expanding the resulting
+ // ATOMIC_CMP_SWAP will produce a libcall.
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Node->getOperand(2),
+ Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getSuccessOrdering(),
+ cast<AtomicSDNode>(Node)->getFailureOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+
+ SDValue ExtRes = Res;
+ SDValue LHS = Res;
+ SDValue RHS = Node->getOperand(1);
+
+ EVT AtomicType = cast<AtomicSDNode>(Node)->getMemoryVT();
+ EVT OuterType = Node->getValueType(0);
+ switch (TLI.getExtendForAtomicOps()) {
+ case ISD::SIGN_EXTEND:
+ LHS = DAG.getNode(ISD::AssertSext, dl, OuterType, Res,
+ DAG.getValueType(AtomicType));
+ RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OuterType,
+ Node->getOperand(2), DAG.getValueType(AtomicType));
+ ExtRes = LHS;
+ break;
+ case ISD::ZERO_EXTEND:
+ LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
+ DAG.getValueType(AtomicType));
+ RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ ExtRes = LHS;
+ break;
+ case ISD::ANY_EXTEND:
+ LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
+ RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
+ break;
+ default:
+ llvm_unreachable("Invalid atomic op extension");
+ }
+
+ SDValue Success =
+ DAG.getSetCC(dl, Node->getValueType(1), LHS, RHS, ISD::SETEQ);
+
+ Results.push_back(ExtRes.getValue(0));
+ Results.push_back(Success);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
+ case ISD::DYNAMIC_STACKALLOC:
+ ExpandDYNAMIC_STACKALLOC(Node, Results);
+ break;
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0; i < Node->getNumValues(); i++)
+ Results.push_back(Node->getOperand(i));
+ break;
+ case ISD::UNDEF: {
+ EVT VT = Node->getValueType(0);
+ if (VT.isInteger())
+ Results.push_back(DAG.getConstant(0, dl, VT));
+ else {
+ assert(VT.isFloatingPoint() && "Unknown value type!");
+ Results.push_back(DAG.getConstantFP(0, dl, VT));
+ }
+ break;
+ }
+ case ISD::FP_ROUND:
+ case ISD::BITCAST:
+ Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ // NOTE: we could fall back on load/store here too for targets without
+ // SAR. However, it is doubtful that any exist.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ EVT VT = Node->getValueType(0);
+ EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ if (VT.isVector())
+ ShiftAmountTy = VT;
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
+ Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+ Node->getOperand(0), ShiftCst);
+ Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_ROUND_INREG: {
+ // The only way we can lower this is to turn it into a TRUNCSTORE,
+ // EXTLOAD pair, targeting a temporary location (a stack slot).
+
+ // NOTE: there is a choice here between constantly creating new stack
+ // slots and always reusing the same one. We currently always create
+ // new ones, as reuse may inhibit scheduling.
+ EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+ Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+ Node->getOperand(0), Node->getValueType(0), dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FP_TO_UINT: {
+ SDValue True, False;
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+ APFloat apf(DAG.EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
+ APInt x = APInt::getSignBit(NVT.getSizeInBits());
+ (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+ Tmp1 = DAG.getConstantFP(apf, dl, VT);
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT),
+ Node->getOperand(0),
+ Tmp1, ISD::SETLT);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+ // TODO: Should any fast-math-flags be set for the FSUB?
+ False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+ DAG.getNode(ISD::FSUB, dl, VT,
+ Node->getOperand(0), Tmp1));
+ False = DAG.getNode(ISD::XOR, dl, NVT, False,
+ DAG.getConstant(x, dl, NVT));
+ Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VAARG:
+ Results.push_back(DAG.expandVAArg(Node));
+ Results.push_back(Results[0].getValue(1));
+ break;
+ case ISD::VACOPY:
+ Results.push_back(DAG.expandVACopy(Node));
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+ // This must be an access of the only element. Return it.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ else
+ Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
+ case ISD::CONCAT_VECTORS: {
+ Results.push_back(ExpandVectorBuildThroughStack(Node));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR:
+ Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+ break;
+ case ISD::INSERT_VECTOR_ELT:
+ Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getOperand(2), dl));
+ break;
+ case ISD::VECTOR_SHUFFLE: {
+ SmallVector<int, 32> NewMask;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ if (!TLI.isTypeLegal(EltVT)) {
+
+ EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+ // BUILD_VECTOR operands are allowed to be wider than the element type.
+ // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept
+ // it.
+ if (NewEltVT.bitsLT(EltVT)) {
+
+ // Convert shuffle node.
+ // If original node was v4i64 and the new EltVT is i32,
+ // cast operands to v8i32 and re-build the mask.
+
+ // Calculate new VT, the size of the new VT should be equal to original.
+ EVT NewVT =
+ EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+ VT.getSizeInBits() / NewEltVT.getSizeInBits());
+ assert(NewVT.bitsEq(VT));
+
+ // cast operands to new VT
+ Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+ // Convert the shuffle mask
+ unsigned int factor =
+ NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+ // EltVT gets smaller
+ assert(factor > 0);
+
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ if (Mask[i] < 0) {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]);
+ }
+ else {
+ for (unsigned fi = 0; fi < factor; ++fi)
+ NewMask.push_back(Mask[i]*factor+fi);
+ }
+ }
+ Mask = NewMask;
+ VT = NewVT;
+ }
+ EltVT = NewEltVT;
+ }
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ if (Mask[i] < 0) {
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+ unsigned Idx = Mask[i];
+ if (Idx < NumElems)
+ Ops.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ else
+ Ops.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1,
+ DAG.getConstant(Idx - NumElems, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ }
+
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ EVT OpTy = Node->getOperand(0).getValueType();
+ if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ // 1 -> Hi
+ Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+ DAG.getConstant(OpTy.getSizeInBits() / 2, dl,
+ TLI.getShiftAmountTy(
+ Node->getOperand(0).getValueType(),
+ DAG.getDataLayout())));
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+ } else {
+ // 0 -> Lo
+ Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+ Node->getOperand(0));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::STACKSAVE:
+ // Expand to CopyFromReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+ Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(1));
+ } else {
+ Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::STACKRESTORE:
+ // Expand to CopyToReg if the target set
+ // StackPointerRegisterToSaveRestore.
+ if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+ Node->getOperand(1)));
+ } else {
+ Results.push_back(Node->getOperand(0));
+ }
+ break;
+ case ISD::GET_DYNAMIC_AREA_OFFSET:
+ Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+ Results.push_back(Results[0].getValue(0));
+ break;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ break;
+ case ISD::FNEG:
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
+ // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
+ Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+ Node->getOperand(0));
+ Results.push_back(Tmp1);
+ break;
+ case ISD::FABS:
+ Results.push_back(ExpandFABS(Node));
+ break;
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: {
+ // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+ ISD::CondCode Pred;
+ switch (Node->getOpcode()) {
+ default: llvm_unreachable("How did we get here?");
+ case ISD::SMAX: Pred = ISD::SETGT; break;
+ case ISD::SMIN: Pred = ISD::SETLT; break;
+ case ISD::UMAX: Pred = ISD::SETUGT; break;
+ case ISD::UMIN: Pred = ISD::SETULT; break;
+ }
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred);
+ Results.push_back(Tmp1);
+ break;
+ }
+
+ case ISD::FSIN:
+ case ISD::FCOS: {
+ EVT VT = Node->getValueType(0);
+ // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
+ // fcos which share the same operand and both are used.
+ if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
+ canCombineSinCosLibcall(Node, TLI, TM))
+ && useSinCos(Node)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
+ if (Node->getOpcode() == ISD::FCOS)
+ Tmp1 = Tmp1.getValue(1);
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::FMAD:
+ llvm_unreachable("Illegal fmad should never be formed");
+
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) != MVT::f32) {
+ // We can extend to types bigger than f32 in two steps without changing
+ // the result. Since "f16 -> f32" is much more commonly available, give
+ // CodeGen the option of emitting that before resorting to a libcall.
+ SDValue Res =
+ DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
+ }
+ break;
+ case ISD::FP_TO_FP16:
+ if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+ SDValue Op = Node->getOperand(0);
+ MVT SVT = Op.getSimpleValueType();
+ if ((SVT == MVT::f64 || SVT == MVT::f80) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) {
+ // Under fastmath, we can expand this node into a fround followed by
+ // a float-half conversion.
+ SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(
+ DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal));
+ }
+ }
+ break;
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+ // Check to see if this FP immediate is already legal.
+ // If this is a legal constant, turn it into a TargetConstantFP node.
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ Results.push_back(ExpandConstantFP(CFP, true));
+ break;
+ }
+ case ISD::Constant: {
+ ConstantSDNode *CP = cast<ConstantSDNode>(Node);
+ Results.push_back(ExpandConstant(CP));
+ break;
+ }
+ case ISD::FSUB: {
+ EVT VT = Node->getValueType(0);
+ if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+ const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags;
+ Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::SUB: {
+ EVT VT = Node->getValueType(0);
+ assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+ "Don't know how to expand this subtraction!");
+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+ DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
+ VT));
+ Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
+ Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+ break;
+ }
+ case ISD::UREM:
+ case ISD::SREM: {
+ EVT VT = Node->getValueType(0);
+ bool isSigned = Node->getOpcode() == ISD::SREM;
+ unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ Tmp2 = Node->getOperand(0);
+ Tmp3 = Node->getOperand(1);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+ Results.push_back(Tmp1);
+ } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+ // X % Y -> X-X/Y*Y
+ Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::UDIV:
+ case ISD::SDIV: {
+ bool isSigned = Node->getOpcode() == ISD::SDIV;
+ unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+ EVT VT = Node->getValueType(0);
+ if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1);
+ }
+ break;
+ }
+ case ISD::MULHU:
+ case ISD::MULHS: {
+ unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+ ISD::SMUL_LOHI;
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+ "If this wasn't legal, it shouldn't have been created!");
+ Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1));
+ Results.push_back(Tmp1.getValue(1));
+ break;
+ }
+ case ISD::MUL: {
+ EVT VT = Node->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, VT);
+ // See if multiply or divide can be lowered using two-result operations.
+ // We just need the low half of the multiply; try both the signed
+ // and unsigned forms. If the target supports both SMUL_LOHI and
+ // UMUL_LOHI, form a preference by checking which forms of plain
+ // MULH it supports.
+ bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+ bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+ bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+ bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+ unsigned OpToUse = 0;
+ if (HasSMUL_LOHI && !HasMULHS) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI && !HasMULHU) {
+ OpToUse = ISD::UMUL_LOHI;
+ } else if (HasSMUL_LOHI) {
+ OpToUse = ISD::SMUL_LOHI;
+ } else if (HasUMUL_LOHI) {
+ OpToUse = ISD::UMUL_LOHI;
+ }
+ if (OpToUse) {
+ Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+ Node->getOperand(1)));
+ break;
+ }
+
+ SDValue Lo, Hi;
+ EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext());
+ if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::OR, VT) &&
+ TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) {
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
+ SDValue Shift =
+ DAG.getConstant(HalfType.getSizeInBits(), dl,
+ TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
+ }
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+ EVT ResultType = Node->getValueType(1);
+ EVT OType = getSetCCResultType(Node->getValueType(0));
+
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType));
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ Results.push_back(Sum);
+
+ EVT ResultType = Node->getValueType(1);
+ EVT SetCCType = getSetCCResultType(Node->getValueType(0));
+ ISD::CondCode CC
+ = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
+ SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+ Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType));
+ break;
+ }
+ case ISD::UMULO:
+ case ISD::SMULO: {
+ EVT VT = Node->getValueType(0);
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static const unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+ if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (TLI.isTypeLegal(WideVT)) {
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(0, dl));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(1, dl));
+ } else {
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ SDValue HiLHS =
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ SDValue HiRHS =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(0, dl));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+ DAG.getIntPtrConstant(1, dl));
+ // Ret is a node with an illegal type. Because such things are not
+ // generally permitted during this phase of legalization, make sure the
+ // node has no more uses. The above EXTRACT_ELEMENT nodes should have been
+ // folded.
+ assert(Ret->use_empty() &&
+ "Unexpected uses of illegally type from expanded lib call.");
+ }
+
+ if (isSigned) {
+ Tmp1 = DAG.getConstant(
+ VT.getSizeInBits() - 1, dl,
+ TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
+ Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+ TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1,
+ ISD::SETNE);
+ } else {
+ TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf,
+ DAG.getConstant(0, dl, VT), ISD::SETNE);
+ }
+ Results.push_back(BottomHalf);
+ Results.push_back(TopHalf);
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT PairTy = Node->getValueType(0);
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+ Tmp2 = DAG.getNode(
+ ISD::SHL, dl, PairTy, Tmp2,
+ DAG.getConstant(PairTy.getSizeInBits() / 2, dl,
+ TLI.getShiftAmountTy(PairTy, DAG.getDataLayout())));
+ Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+ break;
+ }
+ case ISD::SELECT:
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ if (Tmp1.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+ Tmp2, Tmp3,
+ cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+ } else {
+ Tmp1 = DAG.getSelectCC(dl, Tmp1,
+ DAG.getConstant(0, dl, Tmp1.getValueType()),
+ Tmp2, Tmp3, ISD::SETNE);
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::BR_JT: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Table = Node->getOperand(1);
+ SDValue Index = Node->getOperand(2);
+
+ EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
+
+ const DataLayout &TD = DAG.getDataLayout();
+ unsigned EntrySize =
+ DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EntrySize, dl, Index.getValueType()));
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(),
+ Index, Table);
+
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+ SDValue LD = DAG.getExtLoad(
+ ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT);
+ Addr = LD;
+ if (TM.isPositionIndependent()) {
+ // For PIC, the sequence is:
+ // BRIND(load(Jumptable + index) + RelocBase)
+ // RelocBase can be JumpTable, GOT or some sort of global base.
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+ TLI.getPICJumpTableRelocBase(Table, DAG));
+ }
+ Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BRCOND:
+ // Expand brcond's setcc into its constituent parts and create a BR_CC
+ // Node.
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ if (Tmp2.getOpcode() == ISD::SETCC) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+ Tmp1, Tmp2.getOperand(2),
+ Tmp2.getOperand(0), Tmp2.getOperand(1),
+ Node->getOperand(2));
+ } else {
+ // We test only the i1 bit. Skip the AND if UNDEF.
+ Tmp3 = (Tmp2.isUndef()) ? Tmp2 :
+ DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, dl, Tmp2.getValueType()));
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+ DAG.getCondCode(ISD::SETNE), Tmp3,
+ DAG.getConstant(0, dl, Tmp3.getValueType()),
+ Node->getOperand(2));
+ }
+ Results.push_back(Tmp1);
+ break;
+ case ISD::SETCC: {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
+ Tmp3, NeedInvert, dl);
+
+ if (Legalized) {
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SETCC node.
+ if (Tmp3.getNode())
+ Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Tmp3);
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
+
+ Results.push_back(Tmp1);
+ break;
+ }
+
+ // Otherwise, SETCC for the given comparison type must be completely
+ // illegal; expand it into a SELECT_CC.
+ EVT VT = Node->getValueType(0);
+ int TrueValue;
+ switch (TLI.getBooleanContents(Tmp1->getValueType(0))) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ TrueValue = 1;
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ TrueValue = -1;
+ break;
+ }
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+ DAG.getConstant(TrueValue, dl, VT),
+ DAG.getConstant(0, dl, VT),
+ Tmp3);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SELECT_CC: {
+ Tmp1 = Node->getOperand(0); // LHS
+ Tmp2 = Node->getOperand(1); // RHS
+ Tmp3 = Node->getOperand(2); // True
+ Tmp4 = Node->getOperand(3); // False
+ EVT VT = Node->getValueType(0);
+ SDValue CC = Node->getOperand(4);
+ ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
+
+ if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) {
+ // If the condition code is legal, then we need to expand this
+ // node using SETCC and SELECT.
+ EVT CmpVT = Tmp1.getValueType();
+ assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
+ "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
+ "expanded.");
+ EVT CCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
+ Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
+ break;
+ }
+
+ // SELECT_CC is legal, so the condition code must not be.
+ bool Legalized = false;
+ // Try to legalize by inverting the condition. This is for targets that
+ // might support an ordered version of a condition, but not the unordered
+ // version (or vice versa).
+ ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
+ Tmp1.getValueType().isInteger());
+ if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
+ // Use the new condition code and swap true and false
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
+ } else {
+ // If The inverse is not legal, then try to swap the arguments using
+ // the inverse condition code.
+ ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
+ if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) {
+ // The swapped inverse condition is legal, so swap true and false,
+ // lhs and rhs.
+ Legalized = true;
+ Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+ }
+ }
+
+ if (!Legalized) {
+ Legalized = LegalizeSetCCCondCode(
+ getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert,
+ dl);
+
+ assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
+
+ // If we expanded the SETCC by inverting the condition code, then swap
+ // the True/False operands to match.
+ if (NeedInvert)
+ std::swap(Tmp3, Tmp4);
+
+ // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
+ // condition code, create a new SELECT_CC node.
+ if (CC.getNode()) {
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Tmp3, Tmp4, CC);
+ } else {
+ Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType());
+ CC = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
+ Tmp2, Tmp3, Tmp4, CC);
+ }
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BR_CC: {
+ Tmp1 = Node->getOperand(0); // Chain
+ Tmp2 = Node->getOperand(2); // LHS
+ Tmp3 = Node->getOperand(3); // RHS
+ Tmp4 = Node->getOperand(1); // CC
+
+ bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
+ Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl);
+ (void)Legalized;
+ assert(Legalized && "Can't legalize BR_CC with legal condition!");
+
+ // If we expanded the SETCC by inverting the condition code, then wrap
+ // the existing SETCC in a NOT to restore the intended condition.
+ if (NeedInvert)
+ Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0));
+
+ // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
+ // node.
+ if (Tmp4.getNode()) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
+ Tmp4, Tmp2, Tmp3, Node->getOperand(4));
+ } else {
+ Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType());
+ Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4,
+ Tmp2, Tmp3, Node->getOperand(4));
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::BUILD_VECTOR:
+ Results.push_back(ExpandBUILD_VECTOR(Node));
+ break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL: {
+ // Scalarize vector SRA/SRL/SHL.
+ EVT VT = Node->getValueType(0);
+ assert(VT.isVector() && "Unable to legalize non-vector shift");
+ assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+ unsigned NumElem = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Scalars;
+ for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0),
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Sh = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1),
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+ VT.getScalarType(), Ex, Sh));
+ }
+ SDValue Result =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ case ISD::GLOBAL_OFFSET_TABLE:
+ case ISD::GlobalAddress:
+ case ISD::GlobalTLSAddress:
+ case ISD::ExternalSymbol:
+ case ISD::ConstantPool:
+ case ISD::JumpTable:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // FIXME: Custom lowering for these operations shouldn't return null!
+ break;
+ }
+
+ // Replace the original node with the legalized result.
+ if (Results.empty())
+ return false;
+
+ ReplaceNode(Node, Results.data());
+ return true;
+}
+
+void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+ unsigned Opc = Node->getOpcode();
+ switch (Opc) {
+ case ISD::ATOMIC_FENCE: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ // FIXME: handle "fence singlethread" more efficiently.
+ TargetLowering::ArgListTy Args;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
+
+ std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::TRAP: {
+ // If this operation is not supported, lower it to 'abort()' call
+ TargetLowering::ArgListTy Args;
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("abort",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+ Results.push_back(CallResult.second);
+ break;
+ }
+ case ISD::FMINNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
+ break;
+ case ISD::FMAXNUM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
+ break;
+ case ISD::FSQRT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
+ break;
+ case ISD::FSIN:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
+ break;
+ case ISD::FCOS:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
+ break;
+ case ISD::FSINCOS:
+ // Expand into sincos libcall.
+ ExpandSinCosLibCall(Node, Results);
+ break;
+ case ISD::FLOG:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
+ break;
+ case ISD::FLOG2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
+ break;
+ case ISD::FLOG10:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
+ break;
+ case ISD::FEXP:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
+ break;
+ case ISD::FEXP2:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
+ break;
+ case ISD::FTRUNC:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
+ break;
+ case ISD::FFLOOR:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
+ break;
+ case ISD::FCEIL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
+ break;
+ case ISD::FRINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
+ break;
+ case ISD::FNEARBYINT:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
+ break;
+ case ISD::FROUND:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
+ break;
+ case ISD::FPOWI:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128));
+ break;
+ case ISD::FPOW:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
+ break;
+ case ISD::FDIV:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
+ break;
+ case ISD::FREM:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
+ break;
+ case ISD::FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128));
+ break;
+ case ISD::FADD:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+ break;
+ case ISD::FMUL:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
+ break;
+ case ISD::FP16_TO_FP:
+ if (Node->getValueType(0) == MVT::f32) {
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ }
+ break;
+ case ISD::FP_TO_FP16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
+ Results.push_back(ExpandLibCall(LC, Node, false));
+ break;
+ }
+ case ISD::FSUB:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
+ break;
+ case ISD::SREM:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128));
+ break;
+ case ISD::UREM:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128));
+ break;
+ case ISD::SDIV:
+ Results.push_back(ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+ break;
+ case ISD::UDIV:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+ break;
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ // Expand into divrem libcall
+ ExpandDivRemLibCall(Node, Results);
+ break;
+ case ISD::MUL:
+ Results.push_back(ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128));
+ break;
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
+}
+
+// Determine the vector type to use in place of an original scalar element when
+// promoting equally sized vectors.
+static MVT getPromotedVectorElementType(const TargetLowering &TLI,
+ MVT EltVT, MVT NewEltVT) {
+ unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits();
+ MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt);
+ assert(TLI.isTypeLegal(MidVT) && "unexpected");
+ return MidVT;
+}
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ SmallVector<SDValue, 8> Results;
+ MVT OVT = Node->getSimpleValueType(0);
+ if (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::SETCC ||
+ Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ OVT = Node->getOperand(0).getSimpleValueType();
+ }
+ if (Node->getOpcode() == ISD::BR_CC)
+ OVT = Node->getOperand(2).getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+ SDLoc dl(Node);
+ SDValue Tmp1, Tmp2, Tmp3;
+ switch (Node->getOpcode()) {
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ // Zero extend the argument.
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ if (Node->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(),
+ OVT.getSizeInBits());
+ Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1,
+ DAG.getConstant(TopBit, dl, NVT));
+ }
+ // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+ // already the correct result.
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ if (Node->getOpcode() == ISD::CTLZ ||
+ Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+ Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+ DAG.getConstant(NVT.getSizeInBits() -
+ OVT.getSizeInBits(), dl, NVT));
+ }
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+ break;
+ case ISD::BSWAP: {
+ unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+ Tmp1 = DAG.getNode(
+ ISD::SRL, dl, NVT, Tmp1,
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::FP_TO_SINT, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+ Node->getOpcode() == ISD::SINT_TO_FP, dl);
+ Results.push_back(Tmp1);
+ break;
+ case ISD::VAARG: {
+ SDValue Chain = Node->getOperand(0); // Get the chain.
+ SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+ unsigned TruncOp;
+ if (OVT.isVector()) {
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger()
+ && "VAARG promotion is supported only for vectors or integer types");
+ TruncOp = ISD::TRUNCATE;
+ }
+
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+ Node->getConstantOperandVal(3));
+ Chain = Tmp1.getValue(1);
+
+ Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+ if (UpdatedNodes) {
+ UpdatedNodes->insert(Tmp2.getNode());
+ UpdatedNodes->insert(Chain.getNode());
+ }
+ ReplacedNode(Node);
+ break;
+ }
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ unsigned ExtOp, TruncOp;
+ if (OVT.isVector()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else {
+ assert(OVT.isInteger() && "Cannot promote logic operation");
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ }
+ // Promote each of the values to the new type.
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ // Perform the larger operation, then convert back
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+ Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+ break;
+ }
+ case ISD::SELECT: {
+ unsigned ExtOp, TruncOp;
+ if (Node->getValueType(0).isVector() ||
+ Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) {
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
+ } else if (Node->getValueType(0).isInteger()) {
+ ExtOp = ISD::ANY_EXTEND;
+ TruncOp = ISD::TRUNCATE;
+ } else {
+ ExtOp = ISD::FP_EXTEND;
+ TruncOp = ISD::FP_ROUND;
+ }
+ Tmp1 = Node->getOperand(0);
+ // Promote each of the values to the new type.
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ // Perform the larger operation, then round down.
+ Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3);
+ if (TruncOp != ISD::FP_ROUND)
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+ else
+ Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+ DAG.getIntPtrConstant(0, dl));
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
+
+ // Cast the two input vectors.
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
+
+ // Convert the shuffle mask to the right # elements.
+ Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
+ Results.push_back(Tmp1);
+ break;
+ }
+ case ISD::SETCC: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+ Tmp1, Tmp2, Node->getOperand(2)));
+ break;
+ }
+ case ISD::BR_CC: {
+ unsigned ExtOp = ISD::FP_EXTEND;
+ if (NVT.isInteger()) {
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(Node->getOperand(1))->get();
+ ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+ Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+ Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0),
+ Node->getOperand(0), Node->getOperand(1),
+ Tmp1, Tmp2, Node->getOperand(4)));
+ break;
+ }
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FPOW: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
+ Node->getFlags());
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(0, dl)));
+ break;
+ }
+ case ISD::FMA: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+ Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
+ DAG.getIntPtrConstant(0, dl)));
+ break;
+ }
+ case ISD::FCOPYSIGN:
+ case ISD::FPOWI: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+
+ // fcopysign doesn't change anything but the sign bit, so
+ // (fp_round (fcopysign (fpext a), b))
+ // is as precise as
+ // (fp_round (fpext a))
+ // which is a no-op. Mark it as a TRUNCating FP_ROUND.
+ const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
+ break;
+ }
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FTRUNC:
+ case ISD::FNEG:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FABS:
+ case ISD::FEXP:
+ case ISD::FEXP2: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+ Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+ Tmp2, DAG.getIntPtrConstant(0, dl)));
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = build_vector i64:x, i64:y => v4i32
+ // =>
+ // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for build_vector");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
+ SDValue Op = Node->getOperand(I);
+ NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op));
+ }
+
+ SDLoc SL(Node);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size.
+ //
+ // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for extract_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Idx = Node->getOperand(1);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+
+ SmallVector<SDValue, 8> NewOps;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVec, TmpIdx);
+ NewOps.push_back(Elt);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps);
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to a different vector type with the same total bit size
+ //
+ // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ // v2i32:casty = bitcast y:i64
+ //
+ // v2i64 = bitcast
+ // (v4i32 insert_vector_elt
+ // (v4i32 insert_vector_elt v4i32:castx,
+ // (extract_vector_elt casty, 0), 2 * z),
+ // (extract_vector_elt casty, 1), (2 * z + 1))
+
+ assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+ "Invalid promote type for insert_vector_elt");
+ assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+ SDValue Val = Node->getOperand(1);
+ SDValue Idx = Node->getOperand(2);
+ EVT IdxVT = Idx.getValueType();
+ SDLoc SL(Node);
+
+ SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT);
+ SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+ SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+
+ SDValue NewVec = CastVec;
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+ SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+ CastVal, IdxOffset);
+
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT,
+ NewVec, Elt, InEltIdx);
+ }
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec));
+ break;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ MVT EltVT = OVT.getVectorElementType();
+ MVT NewEltVT = NVT.getVectorElementType();
+
+ // Handle bitcasts to different vector type with the smae total bit size.
+ //
+ // e.g. v2i64 = scalar_to_vector x:i64
+ // =>
+ // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef)
+ //
+
+ MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+ SDValue Val = Node->getOperand(0);
+ SDLoc SL(Node);
+
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+ SDValue Undef = DAG.getUNDEF(MidVT);
+
+ SmallVector<SDValue, 8> NewElts;
+ NewElts.push_back(CastVal);
+ for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I)
+ NewElts.push_back(Undef);
+
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts);
+ SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+ Results.push_back(CvtVec);
+ break;
+ }
+ }
+
+ // Replace the original node with the legalized result.
+ if (!Results.empty())
+ ReplaceNode(Node, Results.data());
+}
+
+/// This is the entry point for the file.
+void SelectionDAG::Legalize() {
+ AssignTopologicalOrder();
+
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes);
+
+ // Visit all the nodes. We start in topological order, so that we see
+ // nodes with their original operands intact. Legalization can produce
+ // new nodes which may themselves need to be legalized. Iterate until all
+ // nodes have been legalized.
+ for (;;) {
+ bool AnyLegalized = false;
+ for (auto NI = allnodes_end(); NI != allnodes_begin();) {
+ --NI;
+
+ SDNode *N = &*NI;
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ continue;
+ }
+
+ if (LegalizedNodes.insert(N).second) {
+ AnyLegalized = true;
+ Legalizer.LegalizeOp(N);
+
+ if (N->use_empty() && N != getRoot().getNode()) {
+ ++NI;
+ DeleteNode(N);
+ }
+ }
+ }
+ if (!AnyLegalized)
+ break;
+
+ }
+
+ // Remove dead nodes now.
+ RemoveDeadNodes();
+}
+
+bool SelectionDAG::LegalizeOp(SDNode *N,
+ SmallSetVector<SDNode *, 16> &UpdatedNodes) {
+ SmallPtrSet<SDNode *, 16> LegalizedNodes;
+ SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes);
+
+ // Directly insert the node in question, and legalize it. This will recurse
+ // as needed through operands.
+ LegalizedNodes.insert(N);
+ Legalizer.LegalizeOp(N);
+
+ return LegalizedNodes.count(N);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 000000000000..31ebf7bbec13
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,2139 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float". For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ return
+ VT == MVT::f32 ? Call_F32 :
+ VT == MVT::f64 ? Call_F64 :
+ VT == MVT::f80 ? Call_F80 :
+ VT == MVT::f128 ? Call_F128 :
+ VT == MVT::ppcf128 ? Call_PPCF128 :
+ RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+// Convert Float Results to Integer for Non-HW-supported Operations.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften the result of this operator!");
+
+ case ISD::Register:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ assert(isLegalInHWReg(N->getValueType(ResNo)) &&
+ "Unsupported SoftenFloatRes opcode!");
+ // Only when isLegalInHWReg, we can skip check of the operands.
+ R = SDValue(N, ResNo);
+ break;
+ case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break;
+ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;
+ case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
+ case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
+ case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break;
+ case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
+ case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break;
+ case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
+ case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
+ case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
+ case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
+ case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode()) {
+ SetSoftenedFloat(SDValue(N, ResNo), R);
+ ReplaceSoftenFloatResult(N, ResNo, R);
+ }
+ // Return true only if the node is changed,
+ // assuming that the operands are also converted when necessary.
+ // Otherwise, return false to tell caller to scan operands.
+ return R.getNode() && R.getNode() != N;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return BitConvertToInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+ // Convert the inputs to integers, and build a new pair out of them.
+ return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)),
+ BitConvertToInteger(N->getOperand(0)),
+ BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, we can load better from the constant pool.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+ // In ppcf128, the high 64 bits are always first in memory regardless
+ // of Endianness. LLVM's APFloat representation is not Endian sensitive,
+ // and so always converts into a 128-bit APInt in a non-Endian-sensitive
+ // way. However, APInt's are serialized in an Endian-sensitive fashion,
+ // so on big-Endian targets, the two doubles are output in the wrong
+ // order. Fix this by manually flipping the order of the high 64 bits
+ // and the low 64 bits here.
+ if (DAG.getDataLayout().isBigEndian() &&
+ CN->getValueType(0).getSimpleVT() == llvm::MVT::ppcf128) {
+ uint64_t words[2] = { CN->getValueAPF().bitcastToAPInt().getRawData()[1],
+ CN->getValueAPF().bitcastToAPInt().getRawData()[0] };
+ APInt Val(128, words);
+ return DAG.getConstant(Val, SDLoc(CN),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ CN->getValueType(0)));
+ } else {
+ return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ CN->getValueType(0)));
+ }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ NewOp.getValueType().getVectorElementType(),
+ NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FABS can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Size = NVT.getSizeInBits();
+
+ // Mask = ~(1 << (Size-1))
+ APInt API = APInt::getAllOnesValue(Size);
+ API.clearBit(Size - 1);
+ SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32,
+ RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80,
+ RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32,
+ RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80,
+ RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ SDLoc dl(N);
+
+ EVT LVT = LHS.getValueType();
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // First get the sign bit of second operand.
+ SDValue SignBit = DAG.getNode(
+ ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+ DAG.getConstant(RSize - 1, dl,
+ TLI.getShiftAmountTy(RVT, DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+ if (SizeDiff > 0) {
+ SignBit =
+ DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+ } else if (SizeDiff < 0) {
+ SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+ SignBit =
+ DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(SignBit.getValueType(),
+ DAG.getDataLayout())));
+ }
+
+ // Clear the sign bit of the first operand.
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+ DAG.getConstant(LSize - 1, dl,
+ TLI.getShiftAmountTy(LVT, DAG.getDataLayout())));
+ Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
+ LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+ // Or the value with the sign bit.
+ return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)),
+ GetSoftenedFloat(N->getOperand(2)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
+ // When LegalInHWReg, FNEG can be implemented as native bitwise operations.
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+ SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
+ GetSoftenedFloat(N->getOperand(0)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, false, dl).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+
+ // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
+ // entirely possible for both f16 and f32 to be legal, so use the fully
+ // hard-float FP_EXTEND rather than FP16_TO_FP.
+ if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
+ Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
+ if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
+ SoftenFloatResult(Op.getNode(), 0);
+ }
+
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
+ Op = GetPromotedFloat(Op);
+ // If the promotion did the FP_EXTEND to the destination type for us,
+ // there's nothing left to do here.
+ if (Op.getValueType() == N->getValueType(0)) {
+ return BitConvertToInteger(Op);
+ }
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
+ Op = GetSoftenedFloat(Op);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
+}
+
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
+ EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
+ SDValue Op = N->getOperand(0);
+ SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
+ false, SDLoc(N)).first;
+ if (N->getValueType(0) == MVT::f32)
+ return Res32;
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+ return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f16) {
+ // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a
+ // storage-only type get a chance to select things.
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+ return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ "Unsupported power type!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_F128,
+ RTLIB::REM_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+ GetSoftenedFloat(N->getOperand(1)) };
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ NVT, Ops, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ if (N->getValueType(0) == MVT::f16)
+ return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
+ bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo));
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ auto MMOFlags =
+ L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+ SDValue NewL;
+ if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+ NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl,
+ L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags,
+ L->getAAInfo());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ if (N != NewL.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ return NewL;
+ }
+
+ // Do a non-extending load followed by FP_EXTEND.
+ NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(),
+ dl, L->getChain(), L->getBasePtr(), L->getOffset(),
+ L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(),
+ MMOFlags, L->getAAInfo());
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
+ if (LegalInHWReg)
+ return ExtendNode;
+ return BitConvertToInteger(ExtendNode);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), N->getOperand(0), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) {
+ if (isLegalInHWReg(N->getValueType(ResNo)))
+ return SDValue(N, ResNo);
+ SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ SDValue NewVAARG;
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ if (N != NewVAARG.getValue(1).getNode())
+ ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+ return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(N);
+
+ // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+ // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly
+ // match. Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC,
+ TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+ Op, Signed, dl).first;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Convert Float Operand to Integer for Non-HW-supported Operations.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+ if (CanSkipSoftenFloatOperand(N, OpNo))
+ return false;
+#ifndef NDEBUG
+ dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to soften this operator's operand!");
+
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
+ case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
+ case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
+ case ISD::STORE:
+ Res = SoftenFloatOp_STORE(N, OpNo);
+ // Do not try to analyze or soften this node again if the value is
+ // or can be held in a register. In that case, Res.getNode() should
+ // be equal to N.
+ if (Res.getNode() == N &&
+ isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+ return false;
+ // Otherwise, we need to reanalyze and lower the new Res nodes.
+ break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this to re-analyze.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
+ if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+ return false;
+ // When the operand type can be kept in registers, SoftenFloatResult
+ // will call ReplaceValueWith to replace all references and we can
+ // skip softening this operand.
+ switch (N->getOperand(OpNo).getOpcode()) {
+ case ISD::BITCAST:
+ case ISD::ConstantFP:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::FABS:
+ case ISD::FCOPYSIGN:
+ case ISD::FNEG:
+ case ISD::Register:
+ case ISD::SELECT:
+ case ISD::SELECT_CC:
+ return true;
+ }
+ // For some opcodes, SoftenFloatResult handles all conversion of softening
+ // and replacing operands, so that there is no need to soften operands
+ // again, although such opcode could be scanned for other illegal operands.
+ switch (N->getOpcode()) {
+ case ISD::ConstantFP:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::FABS:
+ case ISD::FCOPYSIGN:
+ case ISD::FNEG:
+ case ISD::Register:
+ case ISD::SELECT:
+ return true;
+ }
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
+ GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
+ // If we get here, the result must be legal but the source illegal.
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+
+ if (SVT == MVT::f16)
+ return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op);
+
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
+
+ return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
+}
+
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+ // We actually deal with the partially-softened FP_TO_FP16 node too, which
+ // returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
+ assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16);
+
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
+ EVT SVT = N->getOperand(0).getValueType();
+ EVT RVT = N->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(N);
+
+ // If the result is not legal, eg: fp -> i1, then it needs to be promoted to
+ // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly
+ // match, eg. we don't have fp -> i8 conversions.
+ // Look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ NVT = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (NVT.bitsGE(RVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first;
+
+ // Truncate the result if the libcall returns a larger type.
+ return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ EVT VT = NewLHS.getValueType();
+ NewLHS = GetSoftenedFloat(NewLHS);
+ NewRHS = GetSoftenedFloat(NewRHS);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (!NewRHS.getNode()) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only soften the stored value!");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ SDLoc dl(N);
+
+ if (ST->isTruncatingStore())
+ // Do an FP_ROUND followed by a non-truncating store.
+ Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+ Val, DAG.getIntPtrConstant(0, dl)));
+ else
+ Val = GetSoftenedFloat(Val);
+
+ return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+ ST->getMemOperand());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+ case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
+ case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
+ case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
+ case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+ case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break;
+ case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ assert(NVT.getSizeInBits() == integerPartWidth &&
+ "Do not know how to expand this float constant!");
+ APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+ SDLoc dl(N);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(integerPartWidth, C.getRawData()[1])),
+ dl, NVT);
+ Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(integerPartWidth, C.getRawData()[0])),
+ dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDLoc dl(N);
+ SDValue Tmp;
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+ Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+ // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+ Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+ ISD::SETEQ);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COPYSIGN_F32,
+ RTLIB::COPYSIGN_F64,
+ RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_F128,
+ RTLIB::COPYSIGN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128),
+ N->getValueType(0), Ops, false,
+ SDLoc(N)).first;
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ N->getValueType(0), Ops, false,
+ SDLoc(N)).first;
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128),
+ N->getValueType(0), Ops, false,
+ SDLoc(N)).first;
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128),
+ N->getValueType(0), Ops, false,
+ SDLoc(N)).first;
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128),
+ N, false);
+ GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getMemoryVT(), LD->getMemOperand());
+
+ // Remember the chain.
+ Chain = Hi.getValue(1);
+
+ // The low part is zero.
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+
+ // Modified the chain - switch anything that used the old chain to use the
+ // new one.
+ ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ SDLoc dl(N);
+
+ // First do an SINT_TO_FP, whether the original was signed or unsigned.
+ // When promoting partial word types to i32 we must honor the signedness,
+ // though.
+ if (SrcVT.bitsLE(MVT::i32)) {
+ // The integer can be represented exactly in an f64.
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i32, Src);
+ Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
+ APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+ Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ } else {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (SrcVT.bitsLE(MVT::i64)) {
+ Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ MVT::i64, Src);
+ LC = RTLIB::SINTTOFP_I64_PPCF128;
+ } else if (SrcVT.bitsLE(MVT::i128)) {
+ Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+ LC = RTLIB::SINTTOFP_I128_PPCF128;
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+ Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first;
+ GetPairElements(Hi, Lo, Hi);
+ }
+
+ if (isSigned)
+ return;
+
+ // Unsigned - fix up the SINT_TO_FP value just calculated.
+ Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+ SrcVT = Src.getValueType();
+
+ // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+ static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 };
+ static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 };
+ static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+ ArrayRef<uint64_t> Parts;
+
+ switch (SrcVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unsupported UINT_TO_FP!");
+ case MVT::i32:
+ Parts = TwoE32;
+ break;
+ case MVT::i64:
+ Parts = TwoE64;
+ break;
+ case MVT::i128:
+ Parts = TwoE128;
+ break;
+ }
+
+ // TODO: Are there fast-math-flags to propagate to this FADD?
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+ DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
+ APInt(128, Parts)),
+ dl, MVT::ppcf128));
+ Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
+ Lo, Hi, ISD::SETLT);
+ GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion. At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+ case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
+ case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break;
+ case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
+ case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+ GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+ assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!");
+
+ // FIXME: This generated code sucks. We want to generate
+ // FCMPU crN, hi1, hi2
+ // BNE crN, L:
+ // FCMPU crN, lo1, lo2
+ // The following can be improved, but not that much.
+ SDValue Tmp1, Tmp2, Tmp3;
+ Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETOEQ);
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, CCCode);
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETUNE);
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode);
+ Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+ NewRHS = SDValue(); // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) {
+ assert(N->getOperand(1).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(1), Lo, Hi);
+ // The ppcf128 value is providing only the sign; take it from the
+ // higher-order double (which must have the larger magnitude).
+ return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N),
+ N->getValueType(0), N->getOperand(0), Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Lo, Hi;
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+ N->getOperand(0), DAG.getValueType(MVT::f64));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+ DAG.getIntPtrConstant(1, dl));
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+ return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available). FIXME: Do this in a less hacky way.
+ if (RVT == MVT::i32) {
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+ APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
+ SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ // TODO: Are there fast-math-flags to propagate to this FSUB?
+ return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
+ DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+ DAG.getNode(ISD::FSUB, dl,
+ MVT::ppcf128,
+ N->getOperand(0),
+ Tmp)),
+ DAG.getConstant(0x80000000, dl,
+ MVT::i32)),
+ DAG.getNode(ISD::FP_TO_SINT, dl,
+ MVT::i32, N->getOperand(0)),
+ ISD::SETGE);
+ }
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
+ false, dl).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (!NewRHS.getNode()) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+ ST->getValue().getValueType());
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+ (void)NVT;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(ST->getValue(), Lo, Hi);
+
+ return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr,
+ ST->getMemoryVT(), ST->getMemOperand());
+}
+
+//===----------------------------------------------------------------------===//
+// Float Operand Promotion
+//===----------------------------------------------------------------------===//
+//
+
+static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f16) {
+ return ISD::FP16_TO_FP;
+ } else if (RetVT == MVT::f16) {
+ return ISD::FP_TO_FP16;
+ }
+
+ report_fatal_error("Attempt at an invalid promotion-related conversion");
+}
+
+bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
+ SDValue R = SDValue();
+
+ // Nodes that use a promotion-requiring floating point operand, but doesn't
+ // produce a promotion-requiring floating point result, need to be legalized
+ // to use the promoted float operand. Nodes that produce at least one
+ // promotion-requiring floating point result have their operands legalized as
+ // a part of PromoteFloatResult.
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Do not know how to promote this operator's operand!");
+
+ case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break;
+ case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break;
+ case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
+ case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
+ case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break;
+ }
+
+ if (R.getNode())
+ ReplaceValueWith(SDValue(N, 0), R);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) {
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op->getValueType(0);
+
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+ assert (IVT == N->getValueType(0) && "Bitcast to type of different size");
+
+ SDValue Promoted = GetPromotedFloat(N->getOperand(0));
+ EVT PromotedVT = Promoted->getValueType(0);
+
+ // Convert the promoted float value to the desired IVT.
+ return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT,
+ Promoted);
+}
+
+// Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by
+// PromoteFloatRes_FCOPYSIGN.
+SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) {
+ assert (OpNo == 1 && "Only Operand 1 must need promotion here");
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), Op1);
+}
+
+// Convert the promoted float value to the desired integer type
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) {
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ EVT VT = N->getValueType(0);
+
+ // Desired VT is same as promoted type. Use promoted float directly.
+ if (VT == Op->getValueType(0))
+ return Op;
+
+ // Else, extend the promoted float value to the desired VT.
+ return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op);
+}
+
+// Promote the float operands used for comparison. The true- and false-
+// operands have the same type as the result and are promoted, if needed, by
+// PromoteFloatRes_SELECT_CC
+SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ SDValue LHS = GetPromotedFloat(N->getOperand(0));
+ SDValue RHS = GetPromotedFloat(N->getOperand(1));
+
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+ LHS, RHS, N->getOperand(2), N->getOperand(3),
+ N->getOperand(4));
+}
+
+// Construct a SETCC that compares the promoted values and sets the conditional
+// code.
+SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+ return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode);
+
+}
+
+// Lower the promoted Float down to the integer value of same size and construct
+// a STORE of the integer value.
+SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Val = ST->getValue();
+ SDLoc DL(N);
+
+ SDValue Promoted = GetPromotedFloat(Val);
+ EVT VT = ST->getOperand(1)->getValueType(0);
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+ SDValue NewVal;
+ NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), DL,
+ IVT, Promoted);
+
+ return DAG.getStore(ST->getChain(), DL, NewVal, ST->getBasePtr(),
+ ST->getMemOperand());
+}
+
+//===----------------------------------------------------------------------===//
+// Float Result Promotion
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ // These opcodes cannot appear if promotion of FP16 is done in the backend
+ // instead of Clang
+ case ISD::FP16_TO_FP:
+ case ISD::FP_TO_FP16:
+ default:
+ llvm_unreachable("Do not know how to promote this operator's result!");
+
+ case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
+ case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break;
+
+ // Unary FP Operations
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC: R = PromoteFloatRes_UnaryOp(N); break;
+
+ // Binary FP Operations
+ case ISD::FADD:
+ case ISD::FDIV:
+ case ISD::FMAXNAN:
+ case ISD::FMINNAN:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break;
+
+ case ISD::FMA: // FMA is same as FMAD
+ case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break;
+
+ case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break;
+
+ case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
+ case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
+ case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
+ case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
+
+ }
+
+ if (R.getNode())
+ SetPromotedFloat(SDValue(N, ResNo), R);
+}
+
+// Bitcast from i16 to f16: convert the i16 to a f32 value instead.
+// At this point, it is not possible to determine if the bitcast value is
+// eventually stored to memory or promoted to f32 or promoted to a floating
+// point at a higher precision. Some of these cases are handled by FP_EXTEND,
+// STORE promotion handlers.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT,
+ N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) {
+ ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Get the (bit-cast) APInt of the APFloat and build an integer constant
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), DL,
+ IVT);
+
+ // Convert the Constant to the desired FP type
+ // FIXME We might be able to do the conversion during compilation and get rid
+ // of it from the object code
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, C);
+}
+
+// If the Index operand is a constant, try to redirect the extract operation to
+// the correct legalized vector. If not, bit-convert the input vector to
+// equivalent integer vector. Extract the element as an (bit-cast) integer
+// value and convert it to the promoted type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDLoc DL(N);
+
+ // If the index is constant, try to extract the value from the legalized
+ // vector type.
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ switch (getTypeAction(VecVT)) {
+ default: break;
+ case TargetLowering::TypeScalarizeVector: {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+ case TargetLowering::TypeWidenVector: {
+ Vec = GetWidenedVector(Vec);
+ SDValue Res = DAG.getNode(N->getOpcode(), DL, EltVT, Vec, Idx);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+ case TargetLowering::TypeSplitVector: {
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ SDValue Res;
+ if (IdxVal < LoElts)
+ Res = DAG.getNode(N->getOpcode(), DL, EltVT, Lo, Idx);
+ else
+ Res = DAG.getNode(N->getOpcode(), DL, EltVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, DL,
+ Idx.getValueType()));
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+ }
+
+ }
+ }
+
+ // Bit-convert the input vector to the equivalent integer vector
+ SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+ EVT IVT = NewOp.getValueType().getVectorElementType();
+
+ // Extract the element as an (bit-cast) integer value
+ SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IVT,
+ NewOp, N->getOperand(1));
+
+ // Convert the element to the desired FP type
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, NewVal);
+}
+
+// FCOPYSIGN(X, Y) returns the value of X with the sign of Y. If the result
+// needs promotion, so does the argument X. Note that Y, if needed, will be
+// handled during operand promotion.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+
+ SDValue Op1 = N->getOperand(1);
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+// Unary operation where the result and the operand have PromoteFloat type
+// action. Construct a new SDNode with the promoted float value of the old
+// operand.
+SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op);
+}
+
+// Binary operations where the result and both operands have PromoteFloat type
+// action. Construct a new SDNode with the promoted float values of the old
+// operands.
+SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = GetPromotedFloat(N->getOperand(1));
+ SDValue Op2 = GetPromotedFloat(N->getOperand(2));
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Op2);
+}
+
+// Promote the Float (first) operand and retain the Integer (second) operand
+SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op0 = GetPromotedFloat(N->getOperand(0));
+ SDValue Op1 = N->getOperand(1);
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+}
+
+// Explicit operation to reduce precision. Reduce the value to half precision
+// and promote it back to the legal type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
+ SDLoc DL(N);
+
+ SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT OpVT = Op->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+
+ // Round promoted float to desired precision
+ SDValue Round = DAG.getNode(GetPromotionOpcode(OpVT, VT), DL, IVT, Op);
+ // Promote it back to the legal output type
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round);
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
+ LoadSDNode *L = cast<LoadSDNode>(N);
+ EVT VT = N->getValueType(0);
+
+ // Load the value as an integer value with the same number of bits.
+ EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ auto MMOFlags =
+ L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+ SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
+ SDLoc(N), L->getChain(), L->getBasePtr(),
+ L->getOffset(), L->getPointerInfo(), IVT,
+ L->getAlignment(), MMOFlags, L->getAAInfo());
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), newL.getValue(1));
+
+ // Convert the integer value to the desired FP type
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL);
+}
+
+// Construct a new SELECT node with the promoted true- and false- values.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) {
+ SDValue TrueVal = GetPromotedFloat(N->getOperand(1));
+ SDValue FalseVal = GetPromotedFloat(N->getOperand(2));
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0),
+ N->getOperand(0), TrueVal, FalseVal);
+}
+
+// Construct a new SELECT_CC node with the promoted true- and false- values.
+// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC.
+SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
+ SDValue TrueVal = GetPromotedFloat(N->getOperand(2));
+ SDValue FalseVal = GetPromotedFloat(N->getOperand(3));
+
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1), TrueVal, FalseVal,
+ N->getOperand(4));
+}
+
+// Construct a SDNode that transforms the SINT or UINT operand to the promoted
+// float type.
+SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue NV = DAG.getNode(N->getOpcode(), DL, NVT, N->getOperand(0));
+ // Round the value to the desired precision (that of the source type).
+ return DAG.getNode(
+ ISD::FP_EXTEND, DL, NVT,
+ DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL)));
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 000000000000..3ab9459c8af7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,3391 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type. For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type. For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to promote this operator!");
+ case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
+ case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
+ case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
+ case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
+ case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
+ case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
+ case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
+ case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
+ case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
+ case ISD::SMIN:
+ case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::UMIN:
+ case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+
+ case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SIGN_EXTEND_INREG:
+ Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+ case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
+ case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
+ case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
+ case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
+
+ case ISD::EXTRACT_SUBVECTOR:
+ Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::BUILD_VECTOR:
+ Res = PromoteIntRes_BUILD_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS:
+ Res = PromoteIntRes_CONCAT_VECTORS(N); break;
+
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+ case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+ case ISD::SDIV:
+ case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+
+ case ISD::UDIV:
+ case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+ case ISD::UADDO:
+ case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+ case ISD::SMULO:
+ case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+ case ISD::ATOMIC_LOAD:
+ Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
+ break;
+ }
+
+ // If the result is null then the sub-method took care of registering it.
+ if (Res.getNode())
+ SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetPromotedInteger(Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+ // Sign-extend the new bits, and continue the assertion.
+ SDValue Op = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertSext, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+ // Zero the new bits, and continue the assertion.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::AssertZext, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
+ EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
+ N->getMemoryVT(), ResVT,
+ N->getChain(), N->getBasePtr(),
+ N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
+ N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(),
+ Op2, N->getMemOperand(), N->getOrdering(),
+ N->getSynchScope());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
+ unsigned ResNo) {
+ if (ResNo == 1) {
+ assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ EVT SVT = getSetCCResultType(N->getOperand(2).getValueType());
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
+ N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
+ N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(),
+ N->getSynchScope());
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
+ return Res.getValue(1);
+ }
+
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+ SDVTList VTs =
+ DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
+ N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(),
+ N->getFailureOrdering(), N->getSynchScope());
+ // Update the use to N with the newly created Res.
+ for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
+ ReplaceValueWith(SDValue(N, i), Res.getValue(i));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
+ // The input promotes to the same size. Convert the promoted value.
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ // Promote the integer operand by hand.
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+ case TargetLowering::TypePromoteFloat: {
+ // Convert the promoted float by hand.
+ SDValue PromotedOp = GetPromotedFloat(InOp);
+ return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
+ break;
+ }
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element to an integer and promote it by hand.
+ if (!NOutVT.isVector())
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ BitConvertToInteger(GetScalarizedVector(InOp)));
+ break;
+ case TargetLowering::TypeSplitVector: {
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ }
+ case TargetLowering::TypeWidenVector:
+ // The input is widened to the same size. Convert to the widened value.
+ // Make sure that the outgoing value is not a vector, because this would
+ // make us bitcast between two vectors which are legalized in different ways.
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+ }
+
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+ CreateStackStoreLoad(InOp, OutVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ return DAG.getNode(
+ ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+ return DAG.getNode(
+ ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+ DAG.getConstant(DiffBits, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+ // The pair element type may be legal, or may not promote to the same type as
+ // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N),
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)), JoinIntegers(N->getOperand(0),
+ N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ // FIXME there is no actual debug info here
+ SDLoc dl(N);
+ // Zero extend things like i1, sign extend everything else. It shouldn't
+ // matter in theory which one we pick, but this tends to give better code?
+ unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue Result = DAG.getNode(Opc, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+ SDValue(N, 0));
+ assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+ "can only promote integers");
+ EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getConvertRndSat(OutVT, SDLoc(N), N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ SDLoc dl(N);
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ // Subtract off the extra leading bits in the bigger type.
+ return DAG.getNode(
+ ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
+ NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = Op.getValueType();
+ SDLoc dl(N);
+ if (N->getOpcode() == ISD::CTTZ) {
+ // The count is the same in the promoted type except if the original
+ // value was zero. This can be handled by setting the bit just off
+ // the top of the original type.
+ auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
+ OVT.getScalarSizeInBits());
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
+ }
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+ N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NewOpc = N->getOpcode();
+ SDLoc dl(N);
+
+ // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+ // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
+ // and SINT conversions are Custom, there is no way to tell which is
+ // preferable. We choose SINT because that's the right thing on PPC.)
+ if (N->getOpcode() == ISD::FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ NewOpc = ISD::FP_TO_SINT;
+
+ SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+ DAG.getValueType(N->getValueType(0).getScalarType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+
+ if (getTypeAction(N->getOperand(0).getValueType())
+ == TargetLowering::TypePromoteInteger) {
+ SDValue Res = GetPromotedInteger(N->getOperand(0));
+ assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+ // If the result and operand types are the same after promotion, simplify
+ // to an in-register extension.
+ if (NVT == Res.getValueType()) {
+ // The high bits are not guaranteed to be anything. Insert an extend.
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(N->getOperand(0).getValueType()));
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendInReg(Res, dl,
+ N->getOperand(0).getValueType().getScalarType());
+ assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+ return Res;
+ }
+ }
+
+ // Otherwise, just extend the original operand all the way to the larger type.
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ISD::LoadExtType ExtType =
+ ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+ SDLoc dl(N);
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getMemoryVT(), N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
+
+ SDLoc dl(N);
+ SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
+ N->getMask(), ExtSrc0, N->getMemoryVT(),
+ N->getMemOperand(), ISD::SEXTLOAD);
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue ExtSrc0 = GetPromotedInteger(N->getValue());
+ assert(NVT == ExtSrc0.getValueType() &&
+ "Gather result type and the passThru agrument type should be the same");
+
+ SDLoc dl(N);
+ SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
+ N->getIndex()};
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+ // Simply change the return type of the boolean result.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT ValueVTs[] = { N->getValueType(0), NVT };
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
+ DAG.getVTList(ValueVTs), Ops);
+
+ // Modified the sum result - switch anything that used the old sum to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 0), Res);
+
+ return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // sign extension of its truncation to the original type.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ SDLoc dl(N);
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: sign extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+ DAG.getValueType(OVT));
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), N->getOperand(0), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
+ SDValue Mask = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ Mask = PromoteTargetBoolean(Mask, OpTy);
+ SDValue LHS = GetPromotedInteger(N->getOperand(1));
+ SDValue RHS = GetPromotedInteger(N->getOperand(2));
+ return DAG.getNode(ISD::VSELECT, SDLoc(N),
+ LHS.getValueType(), Mask, LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetPromotedInteger(N->getOperand(2));
+ SDValue RHS = GetPromotedInteger(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ LHS.getValueType(), N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+ EVT SVT = getSetCCResultType(N->getOperand(0).getValueType());
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
+ SDLoc dl(N);
+ assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+ "Vector compare must return a vector result!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (LHS.getValueType() != RHS.getValueType()) {
+ if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger &&
+ !LHS.getValueType().isVector())
+ LHS = GetPromotedInteger(LHS);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger &&
+ !RHS.getValueType().isVector())
+ RHS = GetPromotedInteger(RHS);
+ }
+
+ // Get the SETCC result using the canonical SETCC type.
+ SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS,
+ N->getOperand(2));
+
+ assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+ // Convert to the expected type.
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ LHS = GetPromotedInteger(LHS);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+ // The input may have strange things in the top bits of the registers, but
+ // these operations don't care. They may have weird bits going out, but
+ // that too is okay if they are integer operations.
+ SDValue LHS = GetPromotedInteger(N->getOperand(0));
+ SDValue RHS = GetPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ // The input value must be properly sign extended.
+ if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ LHS = SExtPromotedInteger(LHS);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ // The input value must be properly zero extended.
+ if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ LHS = ZExtPromotedInteger(LHS);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+ return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res;
+ SDValue InOp = N->getOperand(0);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InOp.getValueType())) {
+ default: llvm_unreachable("Unknown type action!");
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypeExpandInteger:
+ Res = InOp;
+ break;
+ case TargetLowering::TypePromoteInteger:
+ Res = GetPromotedInteger(InOp);
+ break;
+ case TargetLowering::TypeSplitVector:
+ EVT InVT = InOp.getValueType();
+ assert(InVT.isVector() && "Cannot split scalar types");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert(NumElts == NVT.getVectorNumElements() &&
+ "Dst and Src must have the same number of elements");
+ assert(isPowerOf2_32(NumElts) &&
+ "Promoted vector type must be a power of two");
+
+ SDValue EOp1, EOp2;
+ GetSplitVector(InOp, EOp1, EOp2);
+
+ EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+ NumElts/2);
+ EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+ EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
+ }
+
+ // Truncate to NVT instead of VT
+ return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ // The operation overflowed iff the result in the larger type is not the
+ // zero extension of its truncation to the original type.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = LHS.getValueType();
+ SDLoc dl(N);
+
+ // Do the arithmetic in the larger type.
+ unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+ SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+ // Calculate the overflow flag: zero extend the arithmetic result from
+ // the original type.
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ // Overflowed if and only if this is not equal to Res.
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ SDLoc DL(N);
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the
+ // input to the larger type, do the multiply (checking if it overflows),
+ // then also check the high bits of the result to see if overflow happened
+ // there.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1));
+ SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS);
+
+ // Overflow occurred if it occurred in the larger type, or if the high part
+ // of the result does not zero/sign-extend the low part. Check this second
+ // possibility first.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred if the high part is non-zero.
+ SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getIntPtrConstant(SmallVT.getSizeInBits(),
+ DL));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, DL, Hi.getValueType()),
+ ISD::SETNE);
+ } else {
+ // Signed overflow occurred if the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // The only other way for overflow to occur is if the multiplication in the
+ // larger type itself overflowed.
+ Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow,
+ SDValue(Mul.getNode(), 1));
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+ SDValue Chain = N->getOperand(0); // Get the chain.
+ SDValue Ptr = N->getOperand(1); // Get the pointer.
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+ // The argument is passed as NumRegs registers of type RegVT.
+
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
+ Chain = Parts[i].getValue(1);
+ }
+
+ // Handle endianness of the load.
+ if (DAG.getDataLayout().isBigEndian())
+ std::reverse(Parts.begin(), Parts.end());
+
+ // Assemble the parts in the promoted type.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+ for (unsigned i = 1; i < NumRegs; ++i) {
+ SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+ // Shift it to the right position and "or" it in.
+ Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+ DAG.getConstant(i * RegVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+ }
+
+ // Modified the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ return Res;
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to promote this operator's operand!");
+
+ case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
+ case ISD::ATOMIC_STORE:
+ Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N));
+ break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
+ case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
+ case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
+ case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONVERT_RNDSAT:
+ Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+ case ISD::INSERT_VECTOR_ELT:
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
+ case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+ OpNo); break;
+ case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
+ OpNo); break;
+ case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
+ OpNo); break;
+ case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
+ OpNo); break;
+ case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
+ OpNo); break;
+ case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
+ case ISD::FP16_TO_FP:
+ case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison. This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+ ISD::CondCode CCCode) {
+ // We have to insert explicit sign or zero extends. Note that we could
+ // insert sign extends for ALL conditions, but zero extend is cheaper on
+ // many machines (an AND instead of two shifts), so prefer it.
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer comparison!");
+ case ISD::SETEQ:
+ case ISD::SETNE: {
+ SDValue OpL = GetPromotedInteger(NewLHS);
+ SDValue OpR = GetPromotedInteger(NewRHS);
+
+ // We would prefer to promote the comparison operand with sign extension,
+ // if we find the operand is actually to truncate an AssertSext. With this
+ // optimization, we can avoid inserting real truncate instruction, which
+ // is redudant eventually.
+ if (OpL->getOpcode() == ISD::AssertSext &&
+ cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() &&
+ OpR->getOpcode() == ISD::AssertSext &&
+ cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) {
+ NewLHS = OpL;
+ NewRHS = OpR;
+ } else {
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ }
+ break;
+ }
+ case ISD::SETUGE:
+ case ISD::SETUGT:
+ case ISD::SETULE:
+ case ISD::SETULT:
+ // ALL of these operations will work if we either sign or zero extend
+ // the operands (including the unsigned comparisons!). Zero extend is
+ // usually a simpler/cheaper operation, so prefer it.
+ NewLHS = ZExtPromotedInteger(NewLHS);
+ NewRHS = ZExtPromotedInteger(NewRHS);
+ break;
+ case ISD::SETGE:
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ NewLHS = SExtPromotedInteger(NewLHS);
+ NewRHS = SExtPromotedInteger(NewRHS);
+ break;
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
+ SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+ return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
+ N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(),
+ N->getOrdering(), N->getSynchScope());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+ // This should only occur in unusual situations like bitcasting to an
+ // x86_fp80, so just turn it into a store+load
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+ // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+ // legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4)),
+ 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "only know how to promote condition");
+
+ // Promote all the way up to the canonical SetCC type.
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other);
+
+ // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+ // Since the result type is legal, the operands must promote to it.
+ EVT OVT = N->getOperand(0).getValueType();
+ SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+ SDValue Hi = GetPromotedInteger(N->getOperand(1));
+ assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+ SDLoc dl(N);
+
+ Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+ DAG.getConstant(OVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type is not. This implies
+ // that the vector is a power-of-two in length and that the element
+ // type does not have a strange size (eg: it is not i1).
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
+ "Legal vector of one illegal element?");
+
+ // Promote the inserted value. The type does not need to match the
+ // vector element type. Check that any extra bits introduced will be
+ // truncated away.
+ assert(N->getOperand(0).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+
+ SmallVector<SDValue, 16> NewOps;
+ for (unsigned i = 0; i < NumElts; ++i)
+ NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+ CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+ CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+ "can only promote integer arguments");
+ SDValue InOp = GetPromotedInteger(N->getOperand(0));
+ return DAG.getConvertRndSat(N->getValueType(0), SDLoc(N), InOp,
+ N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+ unsigned OpNo) {
+ if (OpNo == 1) {
+ // Promote the inserted value. This is valid because the type does not
+ // have to match the vector element type.
+
+ // Check that any extra bits introduced will be truncated away.
+ assert(N->getOperand(1).getValueType().getSizeInBits() >=
+ N->getValueType(0).getVectorElementType().getSizeInBits() &&
+ "Type of inserted value narrower than vector element type!");
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ GetPromotedInteger(N->getOperand(1)),
+ N->getOperand(2)),
+ 0);
+ }
+
+ assert(OpNo == 2 && "Different operand and result vector types?");
+
+ // Promote the index.
+ SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), Idx), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+ // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+ // the operand in place.
+ return SDValue(DAG.UpdateNodeOperands(N,
+ GetPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote the condition!");
+ SDValue Cond = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
+ Cond = PromoteTargetBoolean(Cond, OpVT);
+
+ return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
+ N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+ // The CC (#4) and the possible return values (#2 and #3) have legal types.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+ // The CC (#2) is always legal.
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ SDLoc dl(N);
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+ Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+ SDLoc dl(N);
+
+ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
+
+ // Truncate the value and store the result.
+ return DAG.getTruncStore(Ch, dl, Val, Ptr,
+ N->getMemoryVT(), N->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
+
+ SDValue DataOp = N->getValue();
+ EVT DataVT = DataOp.getValueType();
+ SDValue Mask = N->getMask();
+ SDLoc dl(N);
+
+ bool TruncateStore = false;
+ if (OpNo == 2) {
+ // Mask comes before the data operand. If the data operand is legal, we just
+ // promote the mask.
+ // When the data operand has illegal type, we should legalize the data
+ // operand first. The mask will be promoted/splitted/widened according to
+ // the data operand type.
+ if (TLI.isTypeLegal(DataVT))
+ Mask = PromoteTargetBoolean(Mask, DataVT);
+ else {
+ if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
+ return PromoteIntOp_MSTORE(N, 3);
+
+ else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
+ return WidenVecOp_MSTORE(N, 3);
+
+ else {
+ assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
+ return SplitVecOp_MSTORE(N, 3);
+ }
+ }
+ } else { // Data operand
+ assert(OpNo == 3 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+ Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+ TruncateStore = true;
+ }
+
+ return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
+ N->getMemoryVT(), N->getMemOperand(),
+ TruncateStore);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 2 && "Only know how to promote the mask!");
+ EVT DataVT = N->getValueType(0);
+ SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[OpNo] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
+ unsigned OpNo) {
+
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValueType(0);
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
+ unsigned OpNo) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ if (OpNo == 2) {
+ // The Mask
+ EVT DataVT = N->getValue().getValueType();
+ NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else
+ NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+ return DAG.getZeroExtendInReg(Op, dl,
+ N->getOperand(0).getValueType().getScalarType());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion. At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Lo, Hi;
+ Lo = Hi = SDValue();
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to expand the result of this operator!");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+ case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+ case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
+
+ case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+ case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break;
+ case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
+ case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
+ case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
+ case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+ case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
+ case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
+ case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+ case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break;
+ case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+ case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break;
+ case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
+ case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_CMP_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other);
+ SDValue Tmp = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
+ N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(),
+ AN->getSynchScope());
+
+ // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
+ // success simply by comparing the loaded value against the ingoing
+ // comparison.
+ SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp,
+ N->getOperand(2), ISD::SETEQ);
+
+ SplitInteger(Tmp, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Success);
+ ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1));
+ break;
+ }
+
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+ case ISD::UMAX:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break;
+
+ case ISD::ADD:
+ case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+ case ISD::ADDC:
+ case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+ case ISD::ADDE:
+ case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+ case ISD::UADDO:
+ case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
+/// N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'. Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc DL(N);
+ // Expand the incoming operand to be shifted, so that we have its parts
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization
+ // splitted a vector shift, like this: <op1, op2> SHL <0, 2>.
+ if (!Amt) {
+ Lo = InL;
+ Hi = InH;
+ return;
+ }
+
+ EVT NVT = InL.getValueType();
+ unsigned VTBits = N->getValueType(0).getSizeInBits();
+ unsigned NVTBits = NVT.getSizeInBits();
+ EVT ShTy = N->getOperand(1).getValueType();
+
+ if (N->getOpcode() == ISD::SHL) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = DAG.getConstant(0, DL, NVT);
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = DAG.getConstant(0, DL, NVT);
+ Hi = InL;
+ } else {
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(Amt, DL, ShTy)),
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::SRL) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = DAG.getNode(ISD::SRL, DL,
+ NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ Hi = DAG.getConstant(0, DL, NVT);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getConstant(0, DL, NVT);
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, DL, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
+ }
+ return;
+ }
+
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ if (Amt.ugt(VTBits)) {
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits - 1, DL, ShTy));
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(Amt - NVTBits, DL, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits - 1, DL, ShTy));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+ DAG.getConstant(NVTBits - 1, DL, ShTy));
+ } else {
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
+ DAG.getConstant(Amt, DL, ShTy)),
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
+ DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
+ }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount. If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+ unsigned NVTBits = NVT.getScalarType().getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ SDLoc dl(N);
+
+ APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+ APInt KnownZero, KnownOne;
+ DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne);
+
+ // If we don't know anything about the high bits, exit.
+ if (((KnownZero|KnownOne) & HighBitMask) == 0)
+ return false;
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ // If we know that any of the high bits of the shift amount are one, then we
+ // can do this as a couple of simple shifts.
+ if (KnownOne.intersects(HighBitMask)) {
+ // Mask out the high bit, which we know is set.
+ Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+ DAG.getConstant(~HighBitMask, dl, ShTy));
+
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ Lo = DAG.getConstant(0, dl, NVT); // Low part is zero.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+ return true;
+ case ISD::SRL:
+ Hi = DAG.getConstant(0, dl, NVT); // Hi part is zero.
+ Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ case ISD::SRA:
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
+ DAG.getConstant(NVTBits - 1, dl, ShTy));
+ Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+ return true;
+ }
+ }
+
+ // If we know that all of the high bits of the shift amount are zero, then we
+ // can do this as a couple of simple shifts.
+ if ((KnownZero & HighBitMask) == HighBitMask) {
+ // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+ // shift if x is zero. We can use XOR here because x is known to be smaller
+ // than 32.
+ SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+ DAG.getConstant(NVTBits - 1, dl, ShTy));
+
+ unsigned Op1, Op2;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+ case ISD::SRL:
+ case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+ }
+
+ // When shifting right the arithmetic for Lo and Hi is swapped.
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(InL, InH);
+
+ // Use a little trick to get the bits that move from Lo to Hi. First
+ // shift by one bit.
+ SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy));
+ // Then compute the remaining shift with amount-1.
+ SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+ Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+ if (N->getOpcode() != ISD::SHL)
+ std::swap(Hi, Lo);
+ return true;
+ }
+
+ return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue Amt = N->getOperand(1);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ShTy = Amt.getValueType();
+ unsigned NVTBits = NVT.getSizeInBits();
+ assert(isPowerOf2_32(NVTBits) &&
+ "Expanded integer type size not a power of two!");
+ SDLoc dl(N);
+
+ // Get the incoming operand to be shifted.
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+
+ SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy);
+ SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+ SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
+ SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy),
+ Amt, DAG.getConstant(0, dl, ShTy),
+ ISD::SETEQ);
+
+ SDValue LoS, HiS, LoL, HiL;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unknown shift");
+ case ISD::SHL:
+ // Short: ShAmt < NVTBits
+ LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ HiS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+ DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ LoL = DAG.getConstant(0, dl, NVT); // Lo part is zero.
+ HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+ Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getSelect(dl, NVT, isZero, InH,
+ DAG.getSelect(dl, NVT, isShort, HiS, HiL));
+ return true;
+ case ISD::SRL:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getConstant(0, dl, NVT); // Hi part is zero.
+ LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getSelect(dl, NVT, isZero, InL,
+ DAG.getSelect(dl, NVT, isShort, LoS, LoL));
+ Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
+ return true;
+ case ISD::SRA:
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
+ DAG.getConstant(NVTBits - 1, dl, ShTy));
+ LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getSelect(dl, NVT, isZero, InL,
+ DAG.getSelect(dl, NVT, isShort, LoS, LoL));
+ Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
+ return true;
+ }
+}
+
+static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) {
+
+ switch (Op) {
+ default: llvm_unreachable("invalid min/max opcode");
+ case ISD::SMAX:
+ return std::make_pair(ISD::SETGT, ISD::UMAX);
+ case ISD::UMAX:
+ return std::make_pair(ISD::SETUGT, ISD::UMAX);
+ case ISD::SMIN:
+ return std::make_pair(ISD::SETLT, ISD::UMIN);
+ case ISD::UMIN:
+ return std::make_pair(ISD::SETULT, ISD::UMIN);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc DL(N);
+ ISD::NodeType LoOpc;
+ ISD::CondCode CondC;
+ std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
+
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ // Value types
+ EVT NVT = LHSL.getValueType();
+ EVT CCT = getSetCCResultType(NVT);
+
+ // Hi part is always the same op
+ Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH});
+
+ // We need to know whether to select Lo part that corresponds to 'winning'
+ // Hi part or if Hi parts are equal.
+ SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
+ SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);
+
+ // Lo part corresponding to the 'winning' Hi part
+ SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
+
+ // Recursed Lo part if Hi parts are equal, this uses unsigned version
+ SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL});
+
+ Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+ EVT NVT = LHSL.getValueType();
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+ // them. TODO: Teach operation legalization how to expand unsupported
+ // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
+ // generate a value of this type in the expanded code sequence.
+ bool hasCarry =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::ADDC : ISD::SUBC,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+ if (hasCarry) {
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
+ }
+ return;
+ }
+
+ bool hasOVF =
+ TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+ ISD::UADDO : ISD::USUBO,
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (hasOVF) {
+ SDVTList VTList = DAG.getVTList(NVT, NVT);
+ TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT);
+ int RevOpc;
+ if (N->getOpcode() == ISD::ADD) {
+ RevOpc = ISD::SUB;
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
+ } else {
+ RevOpc = ISD::ADD;
+ Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+ }
+ SDValue OVF = Lo.getValue(1);
+
+ switch (BoolType) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF);
+ // Fallthrough
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF);
+ }
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
+ SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
+ DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2,
+ DAG.getConstant(1, dl, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+ } else {
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
+ SDValue Cmp =
+ DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getSelect(dl, NVT, Cmp,
+ DAG.getConstant(1, dl, NVT),
+ DAG.getConstant(0, dl, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[2] = { LHSL, RHSL };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ if (N->getOpcode() == ISD::ADDC) {
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
+ } else {
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
+ }
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+ SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+ SDValue HiOps[3] = { LHSH, RHSH };
+
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
+ HiOps[2] = Lo.getValue(1);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is any extension of the input (which degenerates to a copy).
+ Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+ Hi = DAG.getUNDEF(NVT); // The high part is undefined.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part replicates the sign bit of Lo, make it explicit.
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(NVTBits - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ unsigned NVTBits = NVT.getSizeInBits();
+ unsigned EVTBits = EVT.getSizeInBits();
+
+ if (NVTBits < EVTBits) {
+ Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ EVTBits - NVTBits)));
+ } else {
+ Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+ // The high part must be zero, make it explicit.
+ Hi = DAG.getConstant(0, dl, NVT);
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands.
+ Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+ Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned NBitWidth = NVT.getSizeInBits();
+ auto Constant = cast<ConstantSDNode>(N);
+ const APInt &Cst = Constant->getAPIntValue();
+ bool IsTarget = Constant->isTargetOpcode();
+ bool IsOpaque = Constant->isOpaque();
+ SDLoc dl(N);
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget,
+ IsOpaque);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, dl, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
+
+ Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+ DAG.getConstant(NVT.getSizeInBits(), dl,
+ NVT)));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+ DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, dl, NVT), ISD::SETNE);
+
+ SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+ SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
+
+ Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ,
+ DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+ DAG.getConstant(NVT.getSizeInBits(), dl,
+ NVT)));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ if (ISD::isNormalLoad(N)) {
+ ExpandRes_NormalLoad(N, Lo, Hi);
+ return;
+ }
+
+ assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ unsigned Alignment = N->getAlignment();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDLoc dl(N);
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ EVT MemVT = N->getMemoryVT();
+
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT,
+ Alignment, MMOFlags, AAInfo);
+
+ // Remember the chain.
+ Ch = Lo.getValue(1);
+
+ if (ExtType == ISD::SEXTLOAD) {
+ // The high part is obtained by SRA'ing all but one of the bits of the
+ // lo part.
+ unsigned LoSize = Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ // The high part is just a zero.
+ Hi = DAG.getConstant(0, dl, NVT);
+ } else {
+ assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+ // The high part is undefined.
+ Hi = DAG.getUNDEF(NVT);
+ }
+ } else if (DAG.getDataLayout().isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
+ AAInfo);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+ } else {
+ // Big-endian - high bits are at low addresses. Favor aligned loads at
+ // the cost of some bit-fiddling.
+ EVT MemVT = N->getMemoryVT();
+ unsigned EBytes = MemVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+ // Load both the high bits and maybe some of the low bits.
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ MemVT.getSizeInBits() - ExcessBits),
+ Alignment, MMOFlags, AAInfo);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ // Load the rest of the low bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer low bits from the bottom of Hi to the top of Lo.
+ Lo = DAG.getNode(
+ ISD::OR, dl, NVT, Lo,
+ DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout()))));
+ // Move high bits to the right position in Hi.
+ Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT,
+ Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ }
+ }
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+
+ if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH))
+ return;
+
+ // If nothing else, we can make a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL) {
+ // We'll expand the multiplication by brute force because we have no other
+ // options. This is a trivially-generalized version of the code from
+ // Hacker's Delight (itself derived from Knuth's Algorithm M from section
+ // 4.3.1).
+ SDValue Mask =
+ DAG.getConstant(APInt::getLowBitsSet(NVT.getSizeInBits(),
+ NVT.getSizeInBits() >> 1), dl, NVT);
+ SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask);
+ SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask);
+
+ SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
+ SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
+
+ SDValue Shift =
+ DAG.getConstant(NVT.getSizeInBits() >> 1, dl,
+ TLI.getShiftAmountTy(NVT, DAG.getDataLayout()));
+ SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
+ SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
+ SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
+
+ SDValue U = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TL);
+ SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask);
+ SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift);
+
+ SDValue V = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL);
+ SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift);
+
+ SDValue W = DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, LL, RL),
+ DAG.getNode(ISD::ADD, dl, NVT, UH, VH));
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, TH,
+ DAG.getNode(ISD::SHL, dl, NVT, V, Shift));
+
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, W,
+ DAG.getNode(ISD::ADD, dl, NVT,
+ DAG.getNode(ISD::MUL, dl, NVT, RH, LL),
+ DAG.getNode(ISD::MUL, dl, NVT, RL, LH)));
+ return;
+ }
+
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
+ SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
+ Lo = R.getValue(0);
+ Hi = R.getValue(1);
+ ReplaceValueWith(SDValue(N, 1), R.getValue(2));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDLoc dl(Node);
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(Node, 1), Cmp);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(0), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // If we can emit an efficient shift operation, do so now. Check to see if
+ // the RHS is a constant.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi);
+
+ // If we can determine that the high bit of the shift is zero or one, even if
+ // the low bits are variable, emit this shift in an optimized form.
+ if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+ return;
+
+ // If this target supports shift_PARTS, use it. First, map to the _PARTS opc.
+ unsigned PartsOpc;
+ if (N->getOpcode() == ISD::SHL) {
+ PartsOpc = ISD::SHL_PARTS;
+ } else if (N->getOpcode() == ISD::SRL) {
+ PartsOpc = ISD::SRL_PARTS;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ PartsOpc = ISD::SRA_PARTS;
+ }
+
+ // Next check to see if the target supports this SHL_PARTS operation or if it
+ // will custom expand it.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+ if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH;
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ EVT VT = LHSL.getValueType();
+
+ // If the shift amount operand is coming from a vector legalization it may
+ // have an illegal type. Fix that first by casting the operand, otherwise
+ // the new SHL_PARTS operation would need further legalization.
+ SDValue ShiftOp = N->getOperand(1);
+ EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ assert(ShiftTy.getScalarType().getSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
+ "ShiftAmountTy is too small to cover the range of this type!");
+ if (ShiftOp.getValueType() != ShiftTy)
+ ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
+
+ SDValue Ops[] = { LHSL, LHSH, ShiftOp };
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops);
+ Hi = Lo.getValue(1);
+ return;
+ }
+
+ // Otherwise, emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ bool isSigned;
+ if (N->getOpcode() == ISD::SHL) {
+ isSigned = false; /*sign irrelevant*/
+ if (VT == MVT::i16)
+ LC = RTLIB::SHL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SHL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SHL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SHL_I128;
+ } else if (N->getOpcode() == ISD::SRL) {
+ isSigned = false;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRL_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRL_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRL_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRL_I128;
+ } else {
+ assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+ isSigned = true;
+ if (VT == MVT::i16)
+ LC = RTLIB::SRA_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SRA_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SRA_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SRA_I128;
+ }
+
+ if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi);
+ return;
+ }
+
+ if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+ llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is sign extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+ // The high part is obtained by SRA'ing all but one of the bits of low part.
+ unsigned LoSize = NVT.getSizeInBits();
+ Hi = DAG.getNode(
+ ISD::SRA, dl, NVT, Lo,
+ DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+ if (EVT.bitsLE(Lo.getValueType())) {
+ // sext_inreg the low part if needed.
+ Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+ N->getOperand(1));
+
+ // The high part gets the sign extension from the lo-part. This handles
+ // things like sextinreg V:i64 from i8.
+ Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+ DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ } else {
+ // For example, extension of an i48 to an i64. Leave the low part alone,
+ // sext_inreg the high part.
+ unsigned ExcessBits =
+ EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+ Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(1), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+ Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(),
+ N->getOperand(0),
+ DAG.getConstant(NVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDLoc dl(N);
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+ // overflows iff a - b > a.
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
+ N->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // A divide for UMULO should be faster than a function call.
+ if (N->getOpcode() == ISD::UMULO) {
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+
+ SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
+ SplitInteger(MUL, Lo, Hi);
+
+ // A divide for UMULO will be faster than a function call. Select to
+ // make sure we aren't using 0.
+ SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT),
+ RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ);
+ SDValue NotZero = DAG.getSelect(dl, VT, isZero,
+ DAG.getConstant(1, dl, VT), RHS);
+ SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero);
+ SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS,
+ ISD::SETNE);
+ Overflow = DAG.getSelect(dl, N->getValueType(1), isZero,
+ DAG.getConstant(0, dl, N->getValueType(1)),
+ Overflow);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
+
+ Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+ EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+
+ // Replace this with a libcall that will check overflow.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i32)
+ LC = RTLIB::MULO_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::MULO_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::MULO_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+ // Temporary for the overflow value, default it to zero.
+ SDValue Chain =
+ DAG.getStore(DAG.getEntryNode(), dl, DAG.getConstant(0, dl, PtrVT), Temp,
+ MachinePointerInfo());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : N->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+ }
+
+ // Also pass the address of the overflow check.
+ Entry.Node = Temp;
+ Entry.Ty = PtrTy->getPointerTo();
+ Entry.isSExt = true;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
+ .setSExtResult();
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ SplitInteger(CallInfo.first, Lo, Hi);
+ SDValue Temp2 =
+ DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo());
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+ DAG.getConstant(0, dl, PtrVT),
+ ISD::SETNE);
+ // Use the overflow from the libcall everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(0), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+
+ if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) {
+ SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops);
+ SplitInteger(Res.getValue(1), Lo, Hi);
+ return;
+ }
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i16)
+ LC = RTLIB::UREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::UREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::UREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::UREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ SDValue Op = N->getOperand(0);
+ if (Op.getValueType().bitsLE(NVT)) {
+ // The low part is zero extension of the input (degenerates to a copy).
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+ Hi = DAG.getConstant(0, dl, NVT); // The high part is just a zero.
+ } else {
+ // For example, extension of an i48 to an i64. The operand type necessarily
+ // promotes to the result type, so will end up being expanded too.
+ assert(getTypeAction(Op.getValueType()) ==
+ TargetLowering::TypePromoteInteger &&
+ "Only know how to promote this result!");
+ SDValue Res = GetPromotedInteger(Op);
+ assert(Res.getValueType() == N->getValueType(0) &&
+ "Operand over promoted?");
+ // Split the promoted operand. This will simplify when it is expanded.
+ SplitInteger(Res, Lo, Hi);
+ unsigned ExcessBits =
+ Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+ Hi = DAG.getZeroExtendInReg(Hi, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits));
+ }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
+ N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+ #ifndef NDEBUG
+ dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
+ llvm_unreachable("Do not know how to expand this operator's operand!");
+
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
+ case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
+ case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
+ case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
+ case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
+ case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break;
+ case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
+ case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+ case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ case ISD::RETURNADDR:
+ case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
+
+ case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break;
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+ SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl) {
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+ GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+ if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+ if (RHSLo == RHSHi) {
+ if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+ if (RHSCST->isAllOnesValue()) {
+ // Equality comparison to -1.
+ NewLHS = DAG.getNode(ISD::AND, dl,
+ LHSLo.getValueType(), LHSLo, LHSHi);
+ NewRHS = RHSLo;
+ return;
+ }
+ }
+ }
+
+ NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+ NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+ NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType());
+ return;
+ }
+
+ // If this is a comparison of the sign bit, just look at the top part.
+ // X > -1, x < 0
+ if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+ if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ NewLHS = LHSHi;
+ NewRHS = RHSHi;
+ return;
+ }
+
+ // FIXME: This generated code sucks.
+ ISD::CondCode LowCC;
+ switch (CCCode) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETLT:
+ case ISD::SETULT: LowCC = ISD::SETULT; break;
+ case ISD::SETGT:
+ case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+ case ISD::SETLE:
+ case ISD::SETULE: LowCC = ISD::SETULE; break;
+ case ISD::SETGE:
+ case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+ }
+
+ // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison
+ // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands
+ // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+ // NOTE: on targets without efficient SELECT of bools, we can always use
+ // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true,
+ nullptr);
+ SDValue Tmp1, Tmp2;
+ if (TLI.isTypeLegal(LHSLo.getValueType()) &&
+ TLI.isTypeLegal(RHSLo.getValueType()))
+ Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+ if (!Tmp1.getNode())
+ Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
+ LHSLo, RHSLo, LowCC);
+ if (TLI.isTypeLegal(LHSHi.getValueType()) &&
+ TLI.isTypeLegal(RHSHi.getValueType()))
+ Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+ if (!Tmp2.getNode())
+ Tmp2 = DAG.getNode(ISD::SETCC, dl,
+ getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+ ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+ ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+ if ((Tmp1C && Tmp1C->isNullValue()) ||
+ (Tmp2C && Tmp2C->isNullValue() &&
+ (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+ CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+ (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+ (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+ CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+ // low part is known false, returns high part.
+ // For LE / GE, if high part is known false, ignore the low part.
+ // For LT / GT, if high part is known true, ignore the low part.
+ NewLHS = Tmp2;
+ NewRHS = SDValue();
+ return;
+ }
+
+ if (LHSHi == RHSHi) {
+ // Comparing the low bits is enough.
+ NewLHS = Tmp1;
+ NewRHS = SDValue();
+ return;
+ }
+
+ // Lower with SETCCE if the target supports it.
+ // FIXME: Make all targets support this, then remove the other lowering.
+ if (TLI.getOperationAction(
+ ISD::SETCCE,
+ TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) ==
+ TargetLowering::Custom) {
+ // SETCCE can detect < and >= directly. For > and <=, flip operands and
+ // condition code.
+ bool FlipOperands = false;
+ switch (CCCode) {
+ case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break;
+ case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break;
+ case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break;
+ case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break;
+ default: break;
+ }
+ if (FlipOperands) {
+ std::swap(LHSLo, RHSLo);
+ std::swap(LHSHi, RHSHi);
+ }
+ // Perform a wide subtraction, feeding the carry from the low part into
+ // SETCCE. The SETCCE operation is essentially looking at the high part of
+ // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or
+ // positive iff LHS >= RHS.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+ SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo);
+ SDValue Res =
+ DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()),
+ LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode));
+ NewLHS = Res;
+ NewRHS = SDValue();
+ return;
+ }
+
+ NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ, false,
+ DagCombineInfo, dl);
+ if (!NewLHS.getNode())
+ NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
+ LHSHi, RHSHi, ISD::SETEQ);
+ NewLHS = DAG.getSelect(dl, Tmp1.getValueType(),
+ NewLHS, Tmp1, Tmp2);
+ NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ DAG.getCondCode(CCCode), NewLHS, NewRHS,
+ N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!NewRHS.getNode()) {
+ NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType());
+ CCCode = ISD::SETNE;
+ }
+
+ // Update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ N->getOperand(2), N->getOperand(3),
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+ SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+
+ // If ExpandSetCCOperands returned a scalar, use it.
+ if (!NewRHS.getNode()) {
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ return NewLHS;
+ }
+
+ // Otherwise, update N to have the operands specified.
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Carry = N->getOperand(2);
+ SDValue Cond = N->getOperand(3);
+ SDLoc dl = SDLoc(N);
+
+ SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+ GetExpandedInteger(LHS, LHSLo, LHSHi);
+ GetExpandedInteger(RHS, RHSLo, RHSHi);
+
+ // Expand to a SUBE for the low part and a smaller SETCCE for the high.
+ SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+ SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry);
+ return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi,
+ LowCmp.getValue(1), Cond);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+ // The value being shifted is legal, but the shift amount is too big.
+ // It follows that either the result of the shift is undefined, or the
+ // upper half of the shift amount is zero. Just use the lower half.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(1), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+ // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
+ // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+ // constant to valid type.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT DstVT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this SINT_TO_FP!");
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (ISD::isNormalStore(N))
+ return ExpandOp_NormalStore(N, OpNo);
+
+ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+
+ EVT VT = N->getOperand(1).getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ unsigned Alignment = N->getAlignment();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDLoc dl(N);
+ SDValue Lo, Hi;
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ if (N->getMemoryVT().bitsLE(NVT)) {
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(), Alignment, MMOFlags, AAInfo);
+ }
+
+ if (DAG.getDataLayout().isLittleEndian()) {
+ // Little-endian - low bits are at low addresses.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
+ AAInfo);
+
+ unsigned ExcessBits =
+ N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+ EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Hi = DAG.getTruncStore(
+ Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ }
+
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(
+ ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits, dl,
+ TLI.getPointerTy(DAG.getDataLayout()))));
+ }
+
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment,
+ MMOFlags, AAInfo);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+ SDValue InL, InH;
+ GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Just truncate the low part of the source.
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DstVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DstVT. Check that the mantissa
+ // size of DstVT is >= than the number of bits in SrcVT -1.
+ const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT);
+ if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 &&
+ TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // Do a signed conversion then adjust the result.
+ SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+ SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+ // The result of the signed conversion needs adjusting if the 'sign bit' of
+ // the incoming integer was set. To handle this, we dynamically test to see
+ // if it is set, and, if so, add a fudge factor.
+
+ const uint64_t F32TwoE32 = 0x4F800000ULL;
+ const uint64_t F32TwoE64 = 0x5F800000ULL;
+ const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+ APInt FF(32, 0);
+ if (SrcVT == MVT::i32)
+ FF = APInt(32, F32TwoE32);
+ else if (SrcVT == MVT::i64)
+ FF = APInt(32, F32TwoE64);
+ else if (SrcVT == MVT::i128)
+ FF = APInt(32, F32TwoE128);
+ else
+ llvm_unreachable("Unsupported UINT_TO_FP!");
+
+ // Check whether the sign bit is set.
+ SDValue Lo, Hi;
+ GetExpandedInteger(Op, Lo, Hi);
+ SDValue SignSet = DAG.getSetCC(dl,
+ getSetCCResultType(Hi.getValueType()),
+ Hi,
+ DAG.getConstant(0, dl, Hi.getValueType()),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr =
+ DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0, dl);
+ SDValue Four = DAG.getIntPtrConstant(4, dl);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Zero, Four);
+ SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+ FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(),
+ FudgePtr, Offset);
+ Alignment = std::min(Alignment, 4u);
+
+ // Load the value out, extending it from f32 to the destination float type.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ Alignment);
+ return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+ }
+
+ // Otherwise, use a libcall.
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Don't know how to expand this UINT_TO_FP!");
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(),
+ N->getOperand(0),
+ N->getOperand(1), N->getOperand(2),
+ cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+ return Swap.getValue(1);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp0 = N->getOperand(0);
+ EVT InVT = InOp0.getValueType();
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned OutNumElems = OutVT.getVectorNumElements();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue BaseIdx = N->getOperand(1);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(OutNumElems);
+ for (unsigned i = 0; i != OutNumElems; ++i) {
+
+ // Extract the element from the original vector.
+ SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(),
+ BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType()));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ InVT.getVectorElementType(), N->getOperand(0), Index);
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+ // Insert the converted element to the new vector.
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements());
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT OutVT = V0.getValueType();
+
+ return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask);
+}
+
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumElems = N->getNumOperands();
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.reserve(NumElems);
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Op;
+ // BUILD_VECTOR integer operand types are allowed to be larger than the
+ // result's element type. This may still be true after the promotion. For
+ // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to
+ // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>.
+ if (N->getOperand(i).getValueType().bitsLT(NOutVTElem))
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
+ else
+ Op = N->getOperand(i);
+ Ops.push_back(Op);
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+
+ SDLoc dl(N);
+
+ assert(!N->getOperand(0).getValueType().isVector() &&
+ "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
+
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
+ SDLoc dl(N);
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT InElemTy = OutVT.getVectorElementType();
+ EVT OutElemTy = NOutVT.getVectorElementType();
+
+ unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
+ unsigned NumOutElem = NOutVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+ assert(NumElem * NumOperands == NumOutElem &&
+ "Unexpected number of elements");
+
+ // Take the elements from the first vector.
+ SmallVector<SDValue, 8> Ops(NumOutElem);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ SDValue Op = N->getOperand(i);
+ for (unsigned j = 0; j < NumElem; ++j) {
+ SDValue Ext = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op,
+ DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+
+ EVT NOutVTElem = NOutVT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+
+ SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
+ NOutVTElem, N->getOperand(1));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT,
+ V0, ConvElem, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDLoc dl(N);
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ V0->getValueType(0).getScalarType(), V0, V1);
+
+ // EXTRACT_VECTOR_ELT can return types which are wider than the incoming
+ // element types. If this is the case then we need to expand the outgoing
+ // value and not truncate it.
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDLoc dl(N);
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ MVT InVT = V0.getValueType().getSimpleVT();
+ MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1));
+ return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
+ SDLoc dl(N);
+ unsigned NumElems = N->getNumOperands();
+
+ EVT RetSclrTy = N->getValueType(0).getVectorElementType();
+
+ SmallVector<SDValue, 8> NewOps;
+ NewOps.reserve(NumElems);
+
+ // For each incoming vector
+ for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) {
+ SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx));
+ EVT SclrTy = Incoming->getValueType(0).getVectorElementType();
+ unsigned NumElem = Incoming->getValueType(0).getVectorNumElements();
+
+ for (unsigned i=0; i<NumElem; ++i) {
+ // Extract element from incoming vector
+ SDValue Ex = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
+ NewOps.push_back(Tr);
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 000000000000..144bed241ee7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1177 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method. It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types. This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+ // If a node is not processed, then none of its values should be mapped by any
+ // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+ // If a node is processed, then each value with an illegal type must be mapped
+ // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+ // Values with a legal type may be mapped by ReplacedValues, but not by any of
+ // the other maps.
+
+ // Note that these invariants may not hold momentarily when processing a node:
+ // the node being processed may be put in a map before being marked Processed.
+
+ // Note that it is possible to have nodes marked NewNode in the DAG. This can
+ // occur in two ways. Firstly, a node may be created during legalization but
+ // never passed to the legalization core. This is usually due to the implicit
+ // folding that occurs when using the DAG.getNode operators. Secondly, a new
+ // node may be passed to the legalization core, but when analyzed may morph
+ // into a different node, leaving the original node as a NewNode in the DAG.
+ // A node may morph if one of its operands changes during analysis. Whether
+ // it actually morphs or not depends on whether, after updating its operands,
+ // it is equivalent to an existing node: if so, it morphs into that existing
+ // node (CSE). An operand can change during analysis if the operand is a new
+ // node that morphs, or it is a processed value that was mapped to some other
+ // value (as recorded in ReplacedValues) in which case the operand is turned
+ // into that other value. If a node morphs then the node it morphed into will
+ // be used instead of it for legalization, however the original node continues
+ // to live on in the DAG.
+ // The conclusion is that though there may be nodes marked NewNode in the DAG,
+ // all uses of such nodes are also marked NewNode: the result is a fungus of
+ // NewNodes growing on top of the useful nodes, and perhaps using them, but
+ // not used by them.
+
+ // If a value is mapped by ReplacedValues, then it must have no uses, except
+ // by nodes marked NewNode (see above).
+
+ // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+ // Note that ReplacedValues should be applied iteratively.
+
+ // Note that the ReplacedValues map may also map deleted nodes (by iterating
+ // over the DAG we never dereference deleted nodes). This means that it may
+ // also map nodes marked NewNode if the deallocated memory was reallocated as
+ // another node, and that new node was not seen by the LegalizeTypes machinery
+ // (for example because it was created but not used). In general, we cannot
+ // distinguish between new nodes and deleted nodes.
+ SmallVector<SDNode*, 16> NewNodes;
+ for (SDNode &Node : DAG.allnodes()) {
+ // Remember nodes marked NewNode - they are subject to extra checking below.
+ if (Node.getNodeId() == NewNode)
+ NewNodes.push_back(&Node);
+
+ for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
+ SDValue Res(&Node, i);
+ bool Failed = false;
+
+ unsigned Mapped = 0;
+ if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ SDValue NewVal = ReplacedValues[Res];
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+ while (I != ReplacedValues.end()) {
+ NewVal = I->second;
+ I = ReplacedValues.find(NewVal);
+ }
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
+ }
+ if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+ Mapped |= 2;
+ if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+ Mapped |= 4;
+ if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+ Mapped |= 8;
+ if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+ Mapped |= 16;
+ if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+ Mapped |= 32;
+ if (SplitVectors.find(Res) != SplitVectors.end())
+ Mapped |= 64;
+ if (WidenedVectors.find(Res) != WidenedVectors.end())
+ Mapped |= 128;
+
+ if (Node.getNodeId() != Processed) {
+ // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+ // marked NewNode too, since a deleted node may have been reallocated as
+ // another node that has not been seen by the LegalizeTypes machinery.
+ if ((Node.getNodeId() == NewNode && Mapped > 1) ||
+ (Node.getNodeId() != NewNode && Mapped != 0)) {
+ dbgs() << "Unprocessed value in a map!";
+ Failed = true;
+ }
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
+ if (Mapped > 1) {
+ dbgs() << "Value with legal type was transformed!";
+ Failed = true;
+ }
+ } else {
+ if (Mapped == 0) {
+ dbgs() << "Processed value not in any map!";
+ Failed = true;
+ } else if (Mapped & (Mapped - 1)) {
+ dbgs() << "Value in multiple maps!";
+ Failed = true;
+ }
+ }
+
+ if (Failed) {
+ if (Mapped & 1)
+ dbgs() << " ReplacedValues";
+ if (Mapped & 2)
+ dbgs() << " PromotedIntegers";
+ if (Mapped & 4)
+ dbgs() << " SoftenedFloats";
+ if (Mapped & 8)
+ dbgs() << " ScalarizedVectors";
+ if (Mapped & 16)
+ dbgs() << " ExpandedIntegers";
+ if (Mapped & 32)
+ dbgs() << " ExpandedFloats";
+ if (Mapped & 64)
+ dbgs() << " SplitVectors";
+ if (Mapped & 128)
+ dbgs() << " WidenedVectors";
+ dbgs() << "\n";
+ llvm_unreachable(nullptr);
+ }
+ }
+ }
+
+ // Checked that NewNodes are only used by other NewNodes.
+ for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+ SDNode *N = NewNodes[i];
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI)
+ assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+ }
+}
+
+/// This is the main entry point for the type legalizer. This does a top-down
+/// traversal of the dag, legalizing types as it goes. Returns "true" if it made
+/// any changes.
+bool DAGTypeLegalizer::run() {
+ bool Changed = false;
+
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted, and tracking any
+ // changes of the root.
+ HandleSDNode Dummy(DAG.getRoot());
+ Dummy.setNodeId(Unanalyzed);
+
+ // The root of the dag may dangle to deleted nodes until the type legalizer is
+ // done. Set it to null to avoid confusion.
+ DAG.setRoot(SDValue());
+
+ // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+ // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+ // non-leaves.
+ for (SDNode &Node : DAG.allnodes()) {
+ if (Node.getNumOperands() == 0) {
+ Node.setNodeId(ReadyToProcess);
+ Worklist.push_back(&Node);
+ } else {
+ Node.setNodeId(Unanalyzed);
+ }
+ }
+
+ // Now that we have a set of nodes to process, handle them all.
+ while (!Worklist.empty()) {
+#ifndef EXPENSIVE_CHECKS
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ SDNode *N = Worklist.back();
+ Worklist.pop_back();
+ assert(N->getNodeId() == ReadyToProcess &&
+ "Node should be ready if on worklist!");
+
+ if (IgnoreNodeResults(N))
+ goto ScanOperands;
+
+ // Scan the values produced by the node, checking to see if any result
+ // types are illegal.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+ EVT ResultVT = N->getValueType(i);
+ switch (getTypeAction(ResultVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ // The following calls must take care of *all* of the node's results,
+ // not just the illegal result they were passed (this includes results
+ // with a legal type). Results can be remapped using ReplaceValueWith,
+ // or their promoted/expanded/etc values registered in PromotedIntegers,
+ // ExpandedIntegers etc.
+ case TargetLowering::TypePromoteInteger:
+ PromoteIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeExpandInteger:
+ ExpandIntegerResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSoftenFloat:
+ Changed = SoftenFloatResult(N, i);
+ if (Changed)
+ goto NodeDone;
+ // If not changed, the result type should be legally in register.
+ assert(isLegalInHWReg(ResultVT) &&
+ "Unchanged SoftenFloatResult should be legal in register!");
+ goto ScanOperands;
+ case TargetLowering::TypeExpandFloat:
+ ExpandFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeScalarizeVector:
+ ScalarizeVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeSplitVector:
+ SplitVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypeWidenVector:
+ WidenVectorResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ case TargetLowering::TypePromoteFloat:
+ PromoteFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
+ }
+ }
+
+ScanOperands:
+ // Scan the operand list for the node, handling any nodes with operands that
+ // are illegal.
+ {
+ unsigned NumOperands = N->getNumOperands();
+ bool NeedsReanalyzing = false;
+ unsigned i;
+ for (i = 0; i != NumOperands; ++i) {
+ if (IgnoreNodeResults(N->getOperand(i).getNode()))
+ continue;
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ switch (getTypeAction(OpVT)) {
+ case TargetLowering::TypeLegal:
+ continue;
+ // The following calls must either replace all of the node's results
+ // using ReplaceValueWith, and return "false"; or update the node's
+ // operands in place, and return "true".
+ case TargetLowering::TypePromoteInteger:
+ NeedsReanalyzing = PromoteIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandInteger:
+ NeedsReanalyzing = ExpandIntegerOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ NeedsReanalyzing = SoftenFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeExpandFloat:
+ NeedsReanalyzing = ExpandFloatOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeScalarizeVector:
+ NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeSplitVector:
+ NeedsReanalyzing = SplitVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypeWidenVector:
+ NeedsReanalyzing = WidenVectorOperand(N, i);
+ Changed = true;
+ break;
+ case TargetLowering::TypePromoteFloat:
+ NeedsReanalyzing = PromoteFloatOperand(N, i);
+ Changed = true;
+ break;
+ }
+ break;
+ }
+
+ // The sub-method updated N in place. Check to see if any operands are new,
+ // and if so, mark them. If the node needs revisiting, don't add all users
+ // to the worklist etc.
+ if (NeedsReanalyzing) {
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(NewNode);
+ // Recompute the NodeId and correct processed operands, adding the node to
+ // the worklist if ready.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M == N)
+ // The node didn't morph - nothing special to do, it will be revisited.
+ continue;
+
+ // The node morphed - this is equivalent to legalizing by replacing every
+ // value of N with the corresponding value of M. So do that now.
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ // Replacing the value takes care of remapping the new value.
+ ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+ assert(N->getNodeId() == NewNode && "Unexpected node state!");
+ // The node continues to live on as part of the NewNode fungus that
+ // grows on top of the useful nodes. Nothing more needs to be done
+ // with it - move on to the next node.
+ continue;
+ }
+
+ if (i == NumOperands) {
+ DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+ }
+ }
+NodeDone:
+
+ // If we reach here, the node was processed, potentially creating new nodes.
+ // Mark it as processed and add its users to the worklist as appropriate.
+ assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+ N->setNodeId(Processed);
+
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI) {
+ SDNode *User = *UI;
+ int NodeId = User->getNodeId();
+
+ // This node has two options: it can either be a new node or its Node ID
+ // may be a count of the number of operands it has that are not ready.
+ if (NodeId > 0) {
+ User->setNodeId(NodeId-1);
+
+ // If this was the last use it was waiting on, add it to the ready list.
+ if (NodeId-1 == ReadyToProcess)
+ Worklist.push_back(User);
+ continue;
+ }
+
+ // If this is an unreachable new node, then ignore it. If it ever becomes
+ // reachable by being used by a newly created node then it will be handled
+ // by AnalyzeNewNode.
+ if (NodeId == NewNode)
+ continue;
+
+ // Otherwise, this node is new: this is the first operand of it that
+ // became ready. Its new NodeId is the number of operands it has minus 1
+ // (as this node is now processed).
+ assert(NodeId == Unanalyzed && "Unknown node ID!");
+ User->setNodeId(User->getNumOperands() - 1);
+
+ // If the node only has a single operand, it is now ready.
+ if (User->getNumOperands() == 1)
+ Worklist.push_back(User);
+ }
+ }
+
+#ifndef EXPENSIVE_CHECKS
+ if (EnableExpensiveChecks)
+#endif
+ PerformExpensiveChecks();
+
+ // If the root changed (e.g. it was a dead load) update the root.
+ DAG.setRoot(Dummy.getValue());
+
+ // Remove dead nodes. This is important to do for cleanliness but also before
+ // the checking loop below. Implicit folding by the DAG.getNode operators and
+ // node morphing can cause unreachable nodes to be around with their flags set
+ // to new.
+ DAG.RemoveDeadNodes();
+
+ // In a debug build, scan all the nodes to make sure we found them all. This
+ // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+ for (SDNode &Node : DAG.allnodes()) {
+ bool Failed = false;
+
+ // Check that all result types are legal.
+ // A value type is illegal if its TypeAction is not TypeLegal,
+ // and TLI.RegClassForVT does not have a register class for this type.
+ // For example, the x86_64 target has f128 that is not TypeLegal,
+ // to have softened operators, but it also has FR128 register class to
+ // pass and return f128 values. Hence a legalized node can have f128 type.
+ if (!IgnoreNodeResults(&Node))
+ for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
+ if (!isTypeLegal(Node.getValueType(i)) &&
+ !TLI.isTypeLegal(Node.getValueType(i))) {
+ dbgs() << "Result type " << i << " illegal: ";
+ Node.dump();
+ Failed = true;
+ }
+
+ // Check that all operand types are legal.
+ for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
+ if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
+ !isTypeLegal(Node.getOperand(i).getValueType()) &&
+ !TLI.isTypeLegal(Node.getOperand(i).getValueType())) {
+ dbgs() << "Operand type " << i << " illegal: ";
+ Node.getOperand(i).dump();
+ Failed = true;
+ }
+
+ if (Node.getNodeId() != Processed) {
+ if (Node.getNodeId() == NewNode)
+ dbgs() << "New node not analyzed?\n";
+ else if (Node.getNodeId() == Unanalyzed)
+ dbgs() << "Unanalyzed node not noticed?\n";
+ else if (Node.getNodeId() > 0)
+ dbgs() << "Operand not processed?\n";
+ else if (Node.getNodeId() == ReadyToProcess)
+ dbgs() << "Not added to worklist?\n";
+ Failed = true;
+ }
+
+ if (Failed) {
+ Node.dump(&DAG); dbgs() << "\n";
+ llvm_unreachable(nullptr);
+ }
+ }
+#endif
+
+ return Changed;
+}
+
+/// The specified node is the root of a subtree of potentially new nodes.
+/// Correct any processed operands (this may change the node) and calculate the
+/// NodeId. If the node itself changes to a processed node, it is not remapped -
+/// the caller needs to take care of this. Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+ // If this was an existing node that is already done, we're done.
+ if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+ return N;
+
+ // Remove any stale map entries.
+ ExpungeNode(N);
+
+ // Okay, we know that this node is new. Recursively walk all of its operands
+ // to see if they are new also. The depth of this walk is bounded by the size
+ // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+ // about revisiting of nodes.
+ //
+ // As we walk the operands, keep track of the number of nodes that are
+ // processed. If non-zero, this will become the new nodeid of this node.
+ // Operands may morph when they are analyzed. If so, the node will be
+ // updated after all operands have been analyzed. Since this is rare,
+ // the code tries to minimize overhead in the non-morphing case.
+
+ SmallVector<SDValue, 8> NewOps;
+ unsigned NumProcessed = 0;
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue OrigOp = N->getOperand(i);
+ SDValue Op = OrigOp;
+
+ AnalyzeNewValue(Op); // Op may morph.
+
+ if (Op.getNode()->getNodeId() == Processed)
+ ++NumProcessed;
+
+ if (!NewOps.empty()) {
+ // Some previous operand changed. Add this one to the list.
+ NewOps.push_back(Op);
+ } else if (Op != OrigOp) {
+ // This is the first operand to change - add all operands so far.
+ NewOps.append(N->op_begin(), N->op_begin() + i);
+ NewOps.push_back(Op);
+ }
+ }
+
+ // Some operands changed - update the node.
+ if (!NewOps.empty()) {
+ SDNode *M = DAG.UpdateNodeOperands(N, NewOps);
+ if (M != N) {
+ // The node morphed into a different node. Normally for this to happen
+ // the original node would have to be marked NewNode. However this can
+ // in theory momentarily not be the case while ReplaceValueWith is doing
+ // its stuff. Mark the original node NewNode to help sanity checking.
+ N->setNodeId(NewNode);
+ if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+ // It morphed into a previously analyzed node - nothing more to do.
+ return M;
+
+ // It morphed into a different new node. Do the equivalent of passing
+ // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need
+ // to remap the operands, since they are the same as the operands we
+ // remapped above.
+ N = M;
+ ExpungeNode(N);
+ }
+ }
+
+ // Calculate the NodeId.
+ N->setNodeId(N->getNumOperands() - NumProcessed);
+ if (N->getNodeId() == ReadyToProcess)
+ Worklist.push_back(N);
+
+ return N;
+}
+
+/// Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+ Val.setNode(AnalyzeNewNode(Val.getNode()));
+ if (Val.getNode()->getNodeId() == Processed)
+ // We were passed a processed node, or it morphed into one - remap it.
+ RemapValue(Val);
+}
+
+/// If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings. In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+ if (N->getNodeId() != NewNode)
+ return;
+
+ // If N is not remapped by ReplacedValues then there is nothing to do.
+ unsigned i, e;
+ for (i = 0, e = N->getNumValues(); i != e; ++i)
+ if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+ break;
+
+ if (i == e)
+ return;
+
+ // Remove N from all maps - this is expensive but rare.
+
+ for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+ E = PromotedIntegers.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+ E = SoftenedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+ E = ScalarizedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+ E = WidenedVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+ I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+ assert(I->first.getNode() != N);
+ RemapValue(I->second.first);
+ RemapValue(I->second.second);
+ }
+
+ for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+ E = ReplacedValues.end(); I != E; ++I)
+ RemapValue(I->second);
+
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ ReplacedValues.erase(SDValue(N, i));
+}
+
+/// If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+ DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+ if (I != ReplacedValues.end()) {
+ // Use path compression to speed up future lookups if values get multiply
+ // replaced with other values.
+ RemapValue(I->second);
+ N = I->second;
+
+ // Note that it is possible to have N.getNode()->getNodeId() == NewNode at
+ // this point because it is possible for a node to be put in the map before
+ // being processed.
+ }
+}
+
+namespace {
+ /// This class is a DAGUpdateListener that listens for updates to nodes and
+ /// recomputes their ready state.
+ class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+ DAGTypeLegalizer &DTL;
+ SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+ public:
+ explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+ SmallSetVector<SDNode*, 16> &nta)
+ : SelectionDAG::DAGUpdateListener(dtl.getDAG()),
+ DTL(dtl), NodesToAnalyze(nta) {}
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ // It is possible, though rare, for the deleted node N to occur as a
+ // target in a map, so note the replacement N -> E in ReplacedValues.
+ assert(E && "Node not replaced?");
+ DTL.NoteDeletion(N, E);
+
+ // In theory the deleted node could also have been scheduled for analysis.
+ // So remove it from the set of nodes which will be analyzed.
+ NodesToAnalyze.remove(N);
+
+ // In general nothing needs to be done for E, since it didn't change but
+ // only gained new uses. However N -> E was just added to ReplacedValues,
+ // and the result of a ReplacedValues mapping is not allowed to be marked
+ // NewNode. So if E is marked NewNode, then it needs to be analyzed.
+ if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+ NodesToAnalyze.insert(E);
+ }
+
+ void NodeUpdated(SDNode *N) override {
+ // Node updates can mean pretty much anything. It is possible that an
+ // operand was set to something already processed (f.e.) in which case
+ // this node could become ready. Recompute its flags.
+ assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+ N->getNodeId() != DAGTypeLegalizer::Processed &&
+ "Invalid node ID for RAUW deletion!");
+ N->setNodeId(DAGTypeLegalizer::NewNode);
+ NodesToAnalyze.insert(N);
+ }
+ };
+}
+
+
+/// The specified value was legalized to the specified other value.
+/// Update the DAG and NodeIds replacing any uses of From to use To instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+ assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+ // If expansion produced new nodes, make sure they are properly marked.
+ ExpungeNode(From.getNode());
+ AnalyzeNewValue(To); // Expunges To.
+
+ // Anything that used the old node should now use the new one. Note that this
+ // can potentially cause recursive merging.
+ SmallSetVector<SDNode*, 16> NodesToAnalyze;
+ NodeUpdateListener NUL(*this, NodesToAnalyze);
+ do {
+ DAG.ReplaceAllUsesOfValueWith(From, To);
+
+ // The old node may still be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ ReplacedValues[From] = To;
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe
+ // to skip. Note that this is not a morphing node - otherwise it would
+ // still be marked NewNode.
+ continue;
+
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new
+ // node instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ ReplacedValues[OldVal] = NewVal;
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
+ }
+ }
+ // When recursively update nodes with new nodes, it is possible to have
+ // new uses of From due to CSE. If this happens, replace the new uses of
+ // From with To.
+ } while (!From.use_empty());
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted integer");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = PromotedIntegers[Op];
+ assert(!OpEntry.getNode() && "Node is already promoted!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+ // f128 of x86_64 could be kept in SSE registers,
+ // but sometimes softened to i128.
+ assert((Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) ||
+ Op.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ "Invalid type for softened float");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = SoftenedFloats[Op];
+ // Allow repeated calls to save f128 type nodes
+ // or any node with type that transforms to itself.
+ // Many operations on these types are not softened.
+ assert((!OpEntry.getNode()||
+ Op.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ "Node is already converted to integer!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for promoted float");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = PromotedFloats[Op];
+ assert(!OpEntry.getNode() && "Node is already promoted!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+ // Note that in some cases vector operation operands may be greater than
+ // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+ // a constant i8 operand.
+ assert(Result.getValueType().getSizeInBits() >=
+ Op.getValueType().getVectorElementType().getSizeInBits() &&
+ "Invalid type for scalarized vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = ScalarizedVectors[Op];
+ assert(!OpEntry.getNode() && "Node is already scalarized!");
+ OpEntry = Result;
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded integer");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+ assert(!Entry.first.getNode() && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't expanded");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for expanded float");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+ assert(!Entry.first.getNode() && "Node already expanded");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+ SDValue &Hi) {
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ RemapValue(Entry.first);
+ RemapValue(Entry.second);
+ assert(Entry.first.getNode() && "Operand isn't split");
+ Lo = Entry.first;
+ Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+ SDValue Hi) {
+ assert(Lo.getValueType().getVectorElementType() ==
+ Op.getValueType().getVectorElementType() &&
+ 2*Lo.getValueType().getVectorNumElements() ==
+ Op.getValueType().getVectorNumElements() &&
+ Hi.getValueType() == Lo.getValueType() &&
+ "Invalid type for split vector");
+ // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+ AnalyzeNewValue(Lo);
+ AnalyzeNewValue(Hi);
+
+ // Remember that this is the result of the node.
+ std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+ assert(!Entry.first.getNode() && "Node already split");
+ Entry.first = Lo;
+ Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+ "Invalid type for widened vector");
+ AnalyzeNewValue(Result);
+
+ SDValue &OpEntry = WidenedVectors[Op];
+ assert(!OpEntry.getNode() && "Node already widened!");
+ OpEntry = Result;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+ unsigned BitWidth = Op.getValueType().getSizeInBits();
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op),
+ EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// Convert to a vector of integers of the same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Only applies to vectors!");
+ unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+ EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+ unsigned NumElts = Op.getValueType().getVectorNumElements();
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op),
+ EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+ EVT DestVT) {
+ SDLoc dl(Op);
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and destination types.
+ SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+ // Emit a store to the stack slot.
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo());
+ // Result is a load from the stack slot.
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo());
+}
+
+/// Replace the node's results with custom code provided by the target and
+/// return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ if (LegalizeResult)
+ TLI.ReplaceNodeResults(N, Results, DAG);
+ else
+ TLI.LowerOperationWrapper(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom lower it after all.
+ return false;
+
+ // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to
+ // provide the same kind of custom splitting behavior.
+ if (Results.size() == N->getNumValues() + 1 && LegalizeResult) {
+ // We've legalized a return type by splitting it. If there is a chain,
+ // replace that too.
+ SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]);
+ if (N->getNumValues() > 1)
+ ReplaceValueWith(SDValue(N, 1), Results[2]);
+ return true;
+ }
+
+ // Make everything that once used N's values now use those in Results instead.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ }
+ return true;
+}
+
+
+/// Widen the node's results with custom code provided by the target and return
+/// "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+ // See if the target wants to custom lower this node.
+ if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+ return false;
+
+ SmallVector<SDValue, 8> Results;
+ TLI.ReplaceNodeResults(N, Results, DAG);
+
+ if (Results.empty())
+ // The target didn't want to custom widen lower its result after all.
+ return false;
+
+ // Update the widening map.
+ assert(Results.size() == N->getNumValues() &&
+ "Custom lowering returned the wrong number of results!");
+ for (unsigned i = 0, e = Results.size(); i != e; ++i)
+ SetWidenedVector(SDValue(N, i), Results[i]);
+ return true;
+}
+
+SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+ if (i != ResNo)
+ ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+ return SDValue(N->getOperand(ResNo));
+}
+
+/// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
+/// given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(Pair);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(0, dl));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+ DAG.getIntPtrConstant(1, dl));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
+ SDValue Index) {
+ SDLoc dl(Index);
+ // Make sure the index type is big enough to compute in.
+ Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout()));
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltVT.getSizeInBits() &&
+ "Converting bits to bytes lost precision");
+
+ Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+ DAG.getConstant(EltSize, dl, Index.getValueType()));
+ return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+ // Arbitrarily use dlHi for result SDLoc
+ SDLoc dlHi(Hi);
+ SDLoc dlLo(Lo);
+ EVT LVT = Lo.getValueType();
+ EVT HVT = Hi.getValueType();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ LVT.getSizeInBits() + HVT.getSizeInBits());
+
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+ DAG.getConstant(LVT.getSizeInBits(), dlHi,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+ bool isSigned) {
+ unsigned NumOps = N->getNumOperands();
+ SDLoc dl(N);
+ if (NumOps == 0) {
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned,
+ dl).first;
+ } else if (NumOps == 1) {
+ SDValue Op = N->getOperand(0);
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned,
+ dl).first;
+ } else if (NumOps == 2) {
+ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned,
+ dl).first;
+ }
+ SmallVector<SDValue, 8> Ops(NumOps);
+ for (unsigned i = 0; i < NumOps; ++i)
+ Ops[i] = N->getOperand(i);
+
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;
+}
+
+/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that
+/// the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setSExtResult(isSigned).setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+
+ return CallInfo;
+}
+
+/// Promote the given target boolean to a target boolean of the given type.
+/// A target boolean is an integer value, not necessarily of type i1, the bits
+/// of which conform to getBooleanContents.
+///
+/// ValVT is the type of values that produced the boolean.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
+ SDLoc dl(Bool);
+ EVT BoolVT = getSetCCResultType(ValVT);
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT));
+ return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
+}
+
+/// Widen the given target boolean to a target boolean of the given type.
+/// The boolean vector is widened and then promoted to match the target boolean
+/// type of the given ValVT.
+SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
+ bool WithZeroes) {
+ SDLoc dl(Bool);
+ EVT BoolVT = Bool.getValueType();
+
+ assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() &&
+ TLI.isTypeLegal(ValVT) &&
+ "Unexpected types in WidenTargetBoolean");
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(),
+ ValVT.getVectorNumElements());
+ Bool = ModifyToType(Bool, WideVT, WithZeroes);
+ return PromoteTargetBoolean(Bool, ValVT);
+}
+
+/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(Op);
+ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+ Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+ DAG.getConstant(LoVT.getSizeInBits(), dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// Return the lower and upper halves of Op's bits in a value type half the
+/// size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+ SDValue &Lo, SDValue &Hi) {
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
+ Op.getValueType().getSizeInBits()/2);
+ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Entry Point
+//===----------------------------------------------------------------------===//
+
+/// This transforms the SelectionDAG into a SelectionDAG that only uses types
+/// natively supported by the target. Returns "true" if it made any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+ return DAGTypeLegalizer(*this).run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 000000000000..84ad8f83d906
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,859 @@
+//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class. This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// This takes an arbitrary SelectionDAG as input and hacks on it until only
+/// value types the target machine can handle are left. This involves promoting
+/// small sizes to large sizes or splitting up large values into small values.
+///
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
+ const TargetLowering &TLI;
+ SelectionDAG &DAG;
+public:
+ /// This pass uses the NodeId on the SDNodes to hold information about the
+ /// state of the node. The enum has all the values.
+ enum NodeIdFlags {
+ /// All operands have been processed, so this node is ready to be handled.
+ ReadyToProcess = 0,
+
+ /// This is a new node, not before seen, that was created in the process of
+ /// legalizing some other node.
+ NewNode = -1,
+
+ /// This node's ID needs to be set to the number of its unprocessed
+ /// operands.
+ Unanalyzed = -2,
+
+ /// This is a node that has already been processed.
+ Processed = -3
+
+ // 1+ - This is a node which has this many unprocessed operands.
+ };
+private:
+
+ /// This is a bitvector that contains two bits for each simple value type,
+ /// where the two bits correspond to the LegalizeAction enum from
+ /// TargetLowering. This can be queried with "getTypeAction(VT)".
+ TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+ /// Return how we should legalize values of this type.
+ TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT);
+ }
+
+ /// Return true if this type is legal on this target.
+ bool isTypeLegal(EVT VT) const {
+ return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
+ }
+
+ /// Return true if this is a simple legal type.
+ bool isSimpleLegalType(EVT VT) const {
+ return VT.isSimple() && TLI.isTypeLegal(VT);
+ }
+
+ /// Return true if this type can be passed in registers.
+ /// For example, x86_64's f128, should to be legally in registers
+ /// and only some operations converted to library calls or integer
+ /// bitwise operations.
+ bool isLegalInHWReg(EVT VT) const {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ return VT == NVT && isSimpleLegalType(VT);
+ }
+
+ EVT getSetCCResultType(EVT VT) const {
+ return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ }
+
+ /// Pretend all of this node's results are legal.
+ bool IgnoreNodeResults(SDNode *N) const {
+ return N->getOpcode() == ISD::TargetConstant;
+ }
+
+ /// For integer nodes that are below legal width, this map indicates what
+ /// promoted value to use.
+ SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers;
+
+ /// For integer nodes that need to be expanded this map indicates which
+ /// operands are the expanded version of the input.
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers;
+
+ /// For floating-point nodes converted to integers of the same size, this map
+ /// indicates the converted value to use.
+ SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats;
+
+ /// For floating-point nodes that have a smaller precision than the smallest
+ /// supported precision, this map indicates what promoted value to use.
+ SmallDenseMap<SDValue, SDValue, 8> PromotedFloats;
+
+ /// For float nodes that need to be expanded this map indicates which operands
+ /// are the expanded version of the input.
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats;
+
+ /// For nodes that are <1 x ty>, this map indicates the scalar value of type
+ /// 'ty' to use.
+ SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors;
+
+ /// For nodes that need to be split this map indicates which operands are the
+ /// expanded version of the input.
+ SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors;
+
+ /// For vector nodes that need to be widened, indicates the widened value to
+ /// use.
+ SmallDenseMap<SDValue, SDValue, 8> WidenedVectors;
+
+ /// For values that have been replaced with another, indicates the replacement
+ /// value to use.
+ SmallDenseMap<SDValue, SDValue, 8> ReplacedValues;
+
+ /// This defines a worklist of nodes to process. In order to be pushed onto
+ /// this worklist, all operands of a node must have already been processed.
+ SmallVector<SDNode*, 128> Worklist;
+
+public:
+ explicit DAGTypeLegalizer(SelectionDAG &dag)
+ : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+ ValueTypeActions(TLI.getValueTypeActions()) {
+ static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
+ "Too many value types for ValueTypeActions to hold!");
+ }
+
+ /// This is the main entry point for the type legalizer. This does a
+ /// top-down traversal of the dag, legalizing types as it goes. Returns
+ /// "true" if it made any changes.
+ bool run();
+
+ void NoteDeletion(SDNode *Old, SDNode *New) {
+ ExpungeNode(Old);
+ ExpungeNode(New);
+ for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+ ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+ }
+
+ SelectionDAG &getDAG() const { return DAG; }
+
+private:
+ SDNode *AnalyzeNewNode(SDNode *N);
+ void AnalyzeNewValue(SDValue &Val);
+ void ExpungeNode(SDNode *N);
+ void PerformExpensiveChecks();
+ void RemapValue(SDValue &N);
+
+ // Common routines.
+ SDValue BitConvertToInteger(SDValue Op);
+ SDValue BitConvertVectorToIntegerVector(SDValue Op);
+ SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+ bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+ bool CustomWidenLowerNode(SDNode *N, EVT VT);
+
+ /// Replace each result of the given MERGE_VALUES node with the corresponding
+ /// input operand, except for the result 'ResNo', for which the corresponding
+ /// input operand is returned.
+ SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
+
+ SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
+ SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+ SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+ SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
+
+ /// Modify Bit Vector to match SetCC result type of ValVT.
+ /// The bit vector is widened with zeroes when WithZeroes is true.
+ SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
+
+ void ReplaceValueWith(SDValue From, SDValue To);
+ void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+ SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Promotion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed operand Op which was promoted to a larger integer type,
+ /// this returns the promoted value. The low bits of the promoted value
+ /// corresponding to the original type are exactly equal to Op.
+ /// The extra bits contain rubbish, so the promoted value may need to be zero-
+ /// or sign-extended from the original type before it is usable (the helpers
+ /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+ /// For example, if Op is an i16 and was promoted to an i32, then this method
+ /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+ /// 16 bits of which contain rubbish.
+ SDValue GetPromotedInteger(SDValue Op) {
+ SDValue &PromotedOp = PromotedIntegers[Op];
+ RemapValue(PromotedOp);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedInteger(SDValue Op, SDValue Result);
+
+ /// Get a promoted operand and sign extend it to the final size.
+ SDValue SExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ Op = GetPromotedInteger(Op);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+ DAG.getValueType(OldVT));
+ }
+
+ /// Get a promoted operand and zero extend it to the final size.
+ SDValue ZExtPromotedInteger(SDValue Op) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ Op = GetPromotedInteger(Op);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
+ }
+
+ // Integer Result Promotion.
+ void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_AssertSext(SDNode *N);
+ SDValue PromoteIntRes_AssertZext(SDNode *N);
+ SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
+ SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+ SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
+ SDValue PromoteIntRes_BSWAP(SDNode *N);
+ SDValue PromoteIntRes_BITREVERSE(SDNode *N);
+ SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntRes_Constant(SDNode *N);
+ SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntRes_CTLZ(SDNode *N);
+ SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTTZ(SDNode *N);
+ SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
+ SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+ SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+ SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
+ SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
+ SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SELECT(SDNode *N);
+ SDValue PromoteIntRes_VSELECT(SDNode *N);
+ SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+ SDValue PromoteIntRes_SETCC(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N);
+ SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+ SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UNDEF(SDNode *N);
+ SDValue PromoteIntRes_VAARG(SDNode *N);
+ SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+ // Integer Operand Promotion.
+ bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
+ SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+ SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+ SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
+ SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
+
+ void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+ //===--------------------------------------------------------------------===//
+ // Integer Expansion Support: LegalizeIntegerTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed operand Op which was expanded into two integers of half
+ /// the size, this returns the two halves. The low bits of Op are exactly
+ /// equal to the bits of Lo; the high bits exactly equal Hi.
+ /// For example, if Op is an i64 which was expanded into two i32's, then this
+ /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+ /// Op, and Hi being equal to the upper 32 bits.
+ void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Integer Result Expansion.
+ void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+ void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
+ SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+ bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Integer Operand Expansion.
+ bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandIntOp_BR_CC(SDNode *N);
+ SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+ SDValue ExpandIntOp_SETCC(SDNode *N);
+ SDValue ExpandIntOp_SETCCE(SDNode *N);
+ SDValue ExpandIntOp_Shift(SDNode *N);
+ SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+ SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+ SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N);
+
+ void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, const SDLoc &dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given an operand Op of Float type, returns the integer if the Op is not
+ /// supported in target HW and converted to the integer.
+ /// The integer contains exactly the same bits as Op - only the type changed.
+ /// For example, if Op is an f32 which was softened to an i32, then this method
+ /// returns an i32, the bits of which coincide with those of Op.
+ /// If the Op can be efficiently supported in target HW or the operand must
+ /// stay in a register, the Op is not converted to an integer.
+ /// In that case, the given op is returned.
+ SDValue GetSoftenedFloat(SDValue Op) {
+ SDValue &SoftenedOp = SoftenedFloats[Op];
+ if (!SoftenedOp.getNode() &&
+ isSimpleLegalType(Op.getValueType()))
+ return Op;
+ RemapValue(SoftenedOp);
+ assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+ return SoftenedOp;
+ }
+ void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+ // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary.
+ void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) {
+ // When the result type can be kept in HW registers, the converted
+ // NewRes node could have the same type. We can save the effort in
+ // cloning every user of N in SoftenFloatOperand or other legalization functions,
+ // by calling ReplaceValueWith here to update all users.
+ if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo)))
+ ReplaceValueWith(SDValue(N, ResNo), NewRes);
+ }
+
+ // Convert Float Results to Integer for Non-HW-supported Operations.
+ bool SoftenFloatResult(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+ SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FMINNUM(SDNode *N);
+ SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
+ SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCEIL(SDNode *N);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FCOS(SDNode *N);
+ SDValue SoftenFloatRes_FDIV(SDNode *N);
+ SDValue SoftenFloatRes_FEXP(SDNode *N);
+ SDValue SoftenFloatRes_FEXP2(SDNode *N);
+ SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+ SDValue SoftenFloatRes_FLOG(SDNode *N);
+ SDValue SoftenFloatRes_FLOG2(SDNode *N);
+ SDValue SoftenFloatRes_FLOG10(SDNode *N);
+ SDValue SoftenFloatRes_FMA(SDNode *N);
+ SDValue SoftenFloatRes_FMUL(SDNode *N);
+ SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+ SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
+ SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatRes_FPOW(SDNode *N);
+ SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_FREM(SDNode *N);
+ SDValue SoftenFloatRes_FRINT(SDNode *N);
+ SDValue SoftenFloatRes_FROUND(SDNode *N);
+ SDValue SoftenFloatRes_FSIN(SDNode *N);
+ SDValue SoftenFloatRes_FSQRT(SDNode *N);
+ SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+ SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_UNDEF(SDNode *N);
+ SDValue SoftenFloatRes_VAARG(SDNode *N);
+ SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+ // Return true if we can skip softening the given operand or SDNode because
+ // it was soften before by SoftenFloatResult and references to the operand
+ // were replaced by ReplaceValueWith.
+ bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
+
+ // Convert Float Operand to Integer for Non-HW-supported Operations.
+ bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
+ SDValue SoftenFloatOp_BR_CC(SDNode *N);
+ SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
+ SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+ SDValue SoftenFloatOp_SETCC(SDNode *N);
+ SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Float Expansion Support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed operand Op which was expanded into two floating-point
+ /// values of half the size, this returns the two halves.
+ /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+ /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded
+ /// into two f64's, then this method returns the two f64's, with Lo being
+ /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+ void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Float Result Expansion.
+ void ExpandFloatResult(SDNode *N, unsigned ResNo);
+ void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Float Operand Expansion.
+ bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+ SDValue ExpandFloatOp_BR_CC(SDNode *N);
+ SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
+ SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+ SDValue ExpandFloatOp_SETCC(SDNode *N);
+ SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode, const SDLoc &dl);
+
+ //===--------------------------------------------------------------------===//
+ // Float promotion support: LegalizeFloatTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ SDValue GetPromotedFloat(SDValue Op) {
+ SDValue &PromotedOp = PromotedFloats[Op];
+ RemapValue(PromotedOp);
+ assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+ return PromotedOp;
+ }
+ void SetPromotedFloat(SDValue Op, SDValue Result);
+
+ void PromoteFloatResult(SDNode *N, unsigned ResNo);
+ SDValue PromoteFloatRes_BITCAST(SDNode *N);
+ SDValue PromoteFloatRes_BinOp(SDNode *N);
+ SDValue PromoteFloatRes_ConstantFP(SDNode *N);
+ SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
+ SDValue PromoteFloatRes_FMAD(SDNode *N);
+ SDValue PromoteFloatRes_FPOWI(SDNode *N);
+ SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
+ SDValue PromoteFloatRes_LOAD(SDNode *N);
+ SDValue PromoteFloatRes_SELECT(SDNode *N);
+ SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
+ SDValue PromoteFloatRes_UnaryOp(SDNode *N);
+ SDValue PromoteFloatRes_UNDEF(SDNode *N);
+ SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
+
+ bool PromoteFloatOperand(SDNode *N, unsigned ResNo);
+ SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Scalarization Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed one-element vector Op which was scalarized to its
+ /// element type, this returns the element. For example, if Op is a v1i32,
+ /// Op = < i32 val >, this method returns val, an i32.
+ SDValue GetScalarizedVector(SDValue Op) {
+ SDValue &ScalarizedOp = ScalarizedVectors[Op];
+ RemapValue(ScalarizedOp);
+ assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+ return ScalarizedOp;
+ }
+ void SetScalarizedVector(SDValue Op, SDValue Result);
+
+ // Vector Result Scalarization: <1 x ty> -> ty.
+ void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_BinOp(SDNode *N);
+ SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecRes_InregOp(SDNode *N);
+
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+ SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+ SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_VSELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+ SDValue ScalarizeVecRes_SETCC(SDNode *N);
+ SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+ SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+ // Vector Operand Scalarization: <1 x ty> -> ty.
+ bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_VSELECT(SDNode *N);
+ SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Splitting Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed vector Op which was split into vectors of half the size,
+ /// this method returns the halves. The first elements of Op coincide with the
+ /// elements of Lo; the remaining elements of Op coincide with the elements of
+ /// Hi: Op is what you would get by concatenating Lo and Hi.
+ /// For example, if Op is a v8i32 that was split into two v4i32's, then this
+ /// method returns the two v4i32's, with Lo corresponding to the first 4
+ /// elements of Op, and Hi to the last 4 elements.
+ void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+ void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+ // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+ void SplitVectorResult(SDNode *N, unsigned OpNo);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+ SDValue &Hi);
+
+ // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+ bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_UnaryOp(SDNode *N);
+ SDValue SplitVecOp_TruncateHelper(SDNode *N);
+
+ SDValue SplitVecOp_BITCAST(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_VSETCC(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
+ SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Given a processed vector Op which was widened into a larger vector, this
+ /// method returns the larger vector. The elements of the returned vector
+ /// consist of the elements of Op followed by elements containing rubbish.
+ /// For example, if Op is a v2i32 that was widened to a v4i32, then this
+ /// method returns a v4i32 for which the first two elements are the same as
+ /// those of Op, while the last two elements contain rubbish.
+ SDValue GetWidenedVector(SDValue Op) {
+ SDValue &WidenedOp = WidenedVectors[Op];
+ RemapValue(WidenedOp);
+ assert(WidenedOp.getNode() && "Operand wasn't widened?");
+ return WidenedOp;
+ }
+ void SetWidenedVector(SDValue Op, SDValue Result);
+
+ // Widen Vector Result Promotion.
+ void WidenVectorResult(SDNode *N, unsigned ResNo);
+ SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
+ SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+ SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+ SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
+ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_LOAD(SDNode* N);
+ SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
+ SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
+ SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVecRes_SELECT_CC(SDNode* N);
+ SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_UNDEF(SDNode *N);
+ SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+ SDValue WidenVecRes_VSETCC(SDNode* N);
+
+ SDValue WidenVecRes_Ternary(SDNode *N);
+ SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
+ SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecRes_POWI(SDNode *N);
+ SDValue WidenVecRes_Shift(SDNode *N);
+ SDValue WidenVecRes_Unary(SDNode *N);
+ SDValue WidenVecRes_InregOp(SDNode *N);
+
+ // Widen Vector Operand.
+ bool WidenVectorOperand(SDNode *N, unsigned OpNo);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
+ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTEND(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue WidenVecOp_STORE(SDNode* N);
+ SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_SETCC(SDNode* N);
+
+ SDValue WidenVecOp_Convert(SDNode *N);
+ SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+
+ //===--------------------------------------------------------------------===//
+ // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+ //===--------------------------------------------------------------------===//
+
+ /// Helper function to generate a set of loads to load a vector with a
+ /// resulting wider type. It takes:
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD);
+
+ /// Helper function to generate a set of extension loads to load a vector with
+ /// a resulting wider type. It takes:
+ /// LdChain: list of chains for the load to be generated.
+ /// Ld: load to widen
+ /// ExtType: extension element type
+ SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+ /// Helper function to generate a set of stores to store a widen vector into
+ /// non-widen memory.
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
+
+ /// Helper function to generate a set of stores to store a truncate widen
+ /// vector into non-widen memory.
+ /// StChain: list of chains for the stores we have generated
+ /// ST: store of a widen value
+ void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
+ StoreSDNode *ST);
+
+ /// Modifies a vector input (widen or narrows) to a vector of NVT. The
+ /// input vector must have the same element type as NVT.
+ /// When FillWithZeroes is "on" the vector will be widened with zeroes.
+ /// By default, the vector will be widened with undefined values.
+ SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Splitting: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // not necessarily identical types. As such they can be used for splitting
+ // vectors and expanding integers and floats.
+
+ void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isVector())
+ GetSplitVector(Op, Lo, Hi);
+ else if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+ /// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the
+ /// given value.
+ void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+ // Generic Result Splitting.
+ void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ //===--------------------------------------------------------------------===//
+ // Generic Expansion: LegalizeTypesGeneric.cpp
+ //===--------------------------------------------------------------------===//
+
+ // Legalization methods which only use that the illegal type is split into two
+ // identical types of half the size, and that the Lo/Hi part is stored first
+ // in memory on little/big-endian machines, followed by the Hi/Lo part. As
+ // such they can be used for expanding integers and floats.
+
+ void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+ if (Op.getValueType().isInteger())
+ GetExpandedInteger(Op, Lo, Hi);
+ else
+ GetExpandedFloat(Op, Lo, Hi);
+ }
+
+
+ /// This function will split the integer \p Op into \p NumElements
+ /// operations of type \p EltVT and store them in \p Ops.
+ void IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops, EVT EltVT);
+
+ // Generic Result Expansion.
+ void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+ // Generic Operand Expansion.
+ SDValue ExpandOp_BITCAST (SDNode *N);
+ SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
+ SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
+ SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+ SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+ SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 000000000000..665180e119b7
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,555 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type. The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part. This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetExpandedOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ SDLoc dl(N);
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ break;
+ case TargetLowering::TypePromoteFloat:
+ llvm_unreachable("Bitcast of a promotion-needing float should never need"
+ "expansion");
+ case TargetLowering::TypeSoftenFloat: {
+ // Expand the floating point operand only if it was converted to integers.
+ // Otherwise, it is a legal type like f128 that can be saved in a register.
+ auto SoftenedOp = GetSoftenedFloat(InOp);
+ if (SoftenedOp == InOp)
+ break;
+ SplitInteger(SoftenedOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat: {
+ auto &DL = DAG.getDataLayout();
+ // Convert the expanded pieces of the input.
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.hasBigEndianPartOrdering(InVT, DL) !=
+ TLI.hasBigEndianPartOrdering(OutVT, DL))
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ case TargetLowering::TypeSplitVector:
+ GetSplitVector(InOp, Lo, Hi);
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeScalarizeVector:
+ // Convert the element instead.
+ SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ case TargetLowering::TypeWidenVector: {
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
+ InOp = GetWidenedVector(InOp);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
+ std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+ return;
+ }
+ }
+
+ if (InVT.isVector() && OutVT.isInteger()) {
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
+ // is legal but the result is not.
+ unsigned NumElems = 2;
+ EVT ElemVT = NOutVT;
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+
+ // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>.
+ while (!isTypeLegal(NVT)) {
+ unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2;
+ // If the element size is smaller than byte, bail.
+ if (NewSizeInBits < 8)
+ break;
+ NumElems *= 2;
+ ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits);
+ NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems);
+ }
+
+ if (isTypeLegal(NVT)) {
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
+
+ SmallVector<SDValue, 8> Vals;
+ for (unsigned i = 0; i < NumElems; ++i)
+ Vals.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+
+ // Build Lo, Hi pair by pairing extracted elements if needed.
+ unsigned Slot = 0;
+ for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) {
+ // Each iteration will BUILD_PAIR two nodes and append the result until
+ // there are only two nodes left, i.e. Lo and Hi.
+ SDValue LHS = Vals[Slot];
+ SDValue RHS = Vals[Slot + 1];
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LHS, RHS);
+
+ Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
+ EVT::getIntegerVT(
+ *DAG.getContext(),
+ LHS.getValueType().getSizeInBits() << 1),
+ LHS, RHS));
+ }
+ Lo = Vals[Slot++];
+ Hi = Vals[Slot++];
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ return;
+ }
+ }
+
+ // Lower the bit-convert to a store/load from the stack.
+ assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+ // Create the stack frame object. Make sure it is aligned for both
+ // the source and expanded destination types.
+ unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(
+ NOutVT.getTypeForEVT(*DAG.getContext()));
+ SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+
+ // Emit a store to the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo);
+
+ // Load the first half from the stack slot.
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, dl,
+ StackPtr.getValueType()));
+
+ // Load the second half from the stack slot.
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize),
+ MinAlign(Alignment, IncrementSize));
+
+ // Handle endianness of the load.
+ if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Return the operands.
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+ Hi : Lo;
+
+ assert(Part.getValueType() == N->getValueType(0) &&
+ "Type twice as big as expanded type not itself expanded!");
+
+ GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue OldVec = N->getOperand(0);
+ unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+ EVT OldEltVT = OldVec.getValueType().getVectorElementType();
+ SDLoc dl(N);
+
+ // Convert to a vector of the expanded element type, for example
+ // <3 x i64> -> <6 x i32>.
+ EVT OldVT = N->getValueType(0);
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+ if (OldVT != OldEltVT) {
+ // The result of EXTRACT_VECTOR_ELT may be larger than the element type of
+ // the input vector. If so, extend the elements of the input vector to the
+ // same bitwidth as the result before expanding.
+ assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!");
+ EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts);
+ OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0));
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ NewVT, 2*OldElts),
+ OldVec);
+
+ // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+ SDValue Idx = N->getOperand(1);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(1, dl, Idx.getValueType()));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+ SDLoc dl(N);
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT ValueVT = LD->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ unsigned Alignment = LD->getAlignment();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment,
+ LD->getMemOperand()->getFlags(), AAInfo);
+
+ // Increment the pointer to the other half.
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ MinAlign(Alignment, IncrementSize),
+ LD->getMemOperand()->getFlags(), AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Handle endianness of the load.
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ SDLoc dl(N);
+ const unsigned Align = N->getConstantOperandVal(3);
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+
+ // Handle endianness of the load.
+ if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
+ SmallVectorImpl<SDValue> &Ops,
+ EVT EltVT) {
+ assert(Op.getValueType().isInteger());
+ SDLoc DL(Op);
+ SDValue Parts[2];
+
+ if (NumElements > 1) {
+ NumElements >>= 1;
+ SplitInteger(Op, Parts[0], Parts[1]);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Parts[0], Parts[1]);
+ IntegerToVector(Parts[0], NumElements, Ops, EltVT);
+ IntegerToVector(Parts[1], NumElements, Ops, EltVT);
+ } else {
+ Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op));
+ }
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
+ SDLoc dl(N);
+ if (N->getValueType(0).isVector()) {
+ // An illegal expanding type is being converted to a legal vector type.
+ // Make a two element vector out of the expanded parts and convert that
+ // instead, but only if the new vector type is legal (otherwise there
+ // is no point, and it might create expansion loops). For example, on
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+ //
+ // FIXME: I'm not sure why we are first trying to split the input into
+ // a 2 element vector, so I'm leaving it here to maintain the current
+ // behavior.
+ unsigned NumElts = 2;
+ EVT OVT = N->getOperand(0).getValueType();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+ TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+ NumElts);
+ if (!isTypeLegal(NVT)) {
+ // If we can't find a legal type by splitting the integer in half,
+ // then we can use the node's value type.
+ NumElts = N->getValueType(0).getVectorNumElements();
+ NVT = N->getValueType(0);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
+
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
+ makeArrayRef(Ops.data(), NumElts));
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
+ }
+
+ // Otherwise, store to a temporary and load out again as the new type.
+ return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+ SDLoc dl(N);
+
+ assert(OldVT == VecVT.getVectorElementType() &&
+ "BUILD_VECTOR operand type doesn't match vector element type!");
+
+ // Build a vector of twice the length out of the expanded elements.
+ // For example <3 x i64> -> <6 x i32>.
+ std::vector<SDValue> NewElts;
+ NewElts.reserve(NumElts*2);
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(i), Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ NewElts.push_back(Lo);
+ NewElts.push_back(Hi);
+ }
+
+ SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ NewVT, NewElts.size()),
+ NewElts);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+ SDValue Lo, Hi;
+ GetExpandedOp(N->getOperand(0), Lo, Hi);
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+ // The vector type is legal but the element type needs expansion.
+ EVT VecVT = N->getValueType(0);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ SDLoc dl(N);
+
+ SDValue Val = N->getOperand(1);
+ EVT OldEVT = Val.getValueType();
+ EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+ assert(OldEVT == VecVT.getVectorElementType() &&
+ "Inserted element type doesn't match vector element type!");
+
+ // Bitconvert to a vector of twice the length with elements of the expanded
+ // type, insert the expanded vector elements, and then convert back.
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+ NewVecVT, N->getOperand(0));
+
+ SDValue Lo, Hi;
+ GetExpandedOp(Val, Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ SDValue Idx = N->getOperand(2);
+ Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+ Idx = DAG.getNode(ISD::ADD, dl,
+ Idx.getValueType(), Idx,
+ DAG.getConstant(1, dl, Idx.getValueType()));
+ NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+ // Convert the new vector to the old vector type.
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+ "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ Ops[0] = N->getOperand(0);
+ SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+ for (unsigned i = 1; i < NumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+ assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+ assert(OpNo == 1 && "Can only expand the stored value so far");
+ SDLoc dl(N);
+
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ EVT ValueVT = St->getValue().getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
+ SDValue Chain = St->getChain();
+ SDValue Ptr = St->getBasePtr();
+ unsigned Alignment = St->getAlignment();
+ AAMDNodes AAInfo = St->getAAInfo();
+
+ assert(NVT.isByteSized() && "Expanded type not byte sized!");
+ unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+ SDValue Lo, Hi;
+ GetExpandedOp(St->getValue(), Lo, Hi);
+
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment,
+ St->getMemOperand()->getFlags(), AAInfo);
+
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ St->getPointerInfo().getWithOffset(IncrementSize),
+ MinAlign(Alignment, IncrementSize),
+ St->getMemOperand()->getFlags(), AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ GetSplitOp(Op, Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH, CL, CH;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ CL = CH = Cond;
+ if (Cond.getValueType().isVector()) {
+ // Check if there are already splitted versions of the vector available and
+ // use those instead of splitting the mask operand again.
+ if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Cond, CL, CH);
+ else
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ }
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LL, LH, RL, RH;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(2), LL, LH);
+ GetSplitOp(N->getOperand(3), RL, RH);
+
+ Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+ N->getOperand(1), LL, RL, N->getOperand(4));
+ Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+ N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getUNDEF(LoVT);
+ Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 000000000000..3c9cb17b58b2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,1068 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types. For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+ SelectionDAG& DAG;
+ const TargetLowering &TLI;
+ bool Changed; // Keep track of whether anything changed
+
+ /// For nodes that are of legal width, and that have more than one use, this
+ /// map indicates what regularized operand to use. This allows us to avoid
+ /// legalizing the same thing more than once.
+ SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
+
+ /// \brief Adds a node to the translation cache.
+ void AddLegalizedOperand(SDValue From, SDValue To) {
+ LegalizedNodes.insert(std::make_pair(From, To));
+ // If someone requests legalization of the new node, return itself.
+ if (From != To)
+ LegalizedNodes.insert(std::make_pair(To, To));
+ }
+
+ /// \brief Legalizes the given node.
+ SDValue LegalizeOp(SDValue Op);
+
+ /// \brief Assuming the node is legal, "legalize" the results.
+ SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+
+ /// \brief Implements unrolling a VSETCC.
+ SDValue UnrollVSETCC(SDValue Op);
+
+ /// \brief Implement expand-based legalization of vector operations.
+ ///
+ /// This is just a high-level routine to dispatch to specific code paths for
+ /// operations to legalize them.
+ SDValue Expand(SDValue Op);
+
+ /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if
+ /// FSUB isn't legal.
+ ///
+ /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ /// SINT_TO_FLOAT and SHR on vectors isn't legal.
+ SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+
+ /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+ SDValue ExpandSEXTINREG(SDValue Op);
+
+ /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and bitcasts to the proper
+ /// type. The contents of the bits in the extended part of each element are
+ /// undef.
+ SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place, bitcasts to the proper
+ /// type, then shifts left and arithmetic shifts right to introduce a sign
+ /// extension.
+ SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and blends zeros into
+ /// the remaining lanes, finally bitcasting to the proper type.
+ SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// \brief Expand bswap of vectors into a shuffle if legal.
+ SDValue ExpandBSWAP(SDValue Op);
+
+ /// \brief Implement vselect in terms of XOR, AND, OR when blend is not
+ /// supported by the target.
+ SDValue ExpandVSELECT(SDValue Op);
+ SDValue ExpandSELECT(SDValue Op);
+ SDValue ExpandLoad(SDValue Op);
+ SDValue ExpandStore(SDValue Op);
+ SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandBITREVERSE(SDValue Op);
+ SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
+
+ /// \brief Implements vector promotion.
+ ///
+ /// This is essentially just bitcasting the operands to a different type and
+ /// bitcasting the result back to the original type.
+ SDValue Promote(SDValue Op);
+
+ /// \brief Implements [SU]INT_TO_FP vector promotion.
+ ///
+ /// This is a [zs]ext of the input operand to the next size up.
+ SDValue PromoteINT_TO_FP(SDValue Op);
+
+ /// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
+ ///
+ /// It is promoted to the next size up integer type. The result is then
+ /// truncated back to the original type.
+ SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
+
+public:
+ /// \brief Begin legalizer the vector operations in the DAG.
+ bool Run();
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+ // Before we start legalizing vector nodes, check if there are any vectors.
+ bool HasVectors = false;
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
+ // Check if the values of the nodes contain vectors. We don't need to check
+ // the operands because we are going to check their values at some point.
+ for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
+ J != E; ++J)
+ HasVectors |= J->isVector();
+
+ // If we found a vector node we can start the legalization.
+ if (HasVectors)
+ break;
+ }
+
+ // If this basic block has no vectors then no need to legalize vectors.
+ if (!HasVectors)
+ return false;
+
+ // The legalize process is inherently a bottom-up recursive process (users
+ // legalize their uses before themselves). Given infinite stack space, we
+ // could just start legalizing on the root and traverse the whole graph. In
+ // practice however, this causes us to run out of stack space on large basic
+ // blocks. To avoid this problem, compute an ordering of the nodes where each
+ // node is only legalized after all of its operands are legalized.
+ DAG.AssignTopologicalOrder();
+ for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+ E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
+ LegalizeOp(SDValue(&*I, 0));
+
+ // Finally, it's possible the root changed. Get the new root.
+ SDValue OldRoot = DAG.getRoot();
+ assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+ DAG.setRoot(LegalizedNodes[OldRoot]);
+
+ LegalizedNodes.clear();
+
+ // Remove dead nodes now.
+ DAG.RemoveDeadNodes();
+
+ return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+ // Generic legalization: just pass the operand through.
+ for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+ return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+ if (I != LegalizedNodes.end()) return I->second;
+
+ SDNode* Node = Op.getNode();
+
+ // Legalize the operands
+ SmallVector<SDValue, 8> Ops;
+ for (const SDValue &Op : Node->op_values())
+ Ops.push_back(LegalizeOp(Op));
+
+ SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
+
+ bool HasVectorValue = false;
+ if (Op.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)
+ switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
+ LD->getMemoryVT())) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ return TranslateLegalizeResults(Op, Result);
+ case TargetLowering::Custom:
+ if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
+ if (Lowered == Result)
+ return TranslateLegalizeResults(Op, Lowered);
+ Changed = true;
+ if (Lowered->getNumValues() != Op->getNumValues()) {
+ // This expanded to something other than the load. Assume the
+ // lowering code took care of any chain values, and just handle the
+ // returned value.
+ assert(Result.getValue(1).use_empty() &&
+ "There are still live users of the old chain!");
+ return LegalizeOp(Lowered);
+ }
+ return TranslateLegalizeResults(Op, Lowered);
+ }
+ case TargetLowering::Expand:
+ Changed = true;
+ return LegalizeOp(ExpandLoad(Op));
+ }
+ } else if (Op.getOpcode() == ISD::STORE) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ EVT StVT = ST->getMemoryVT();
+ MVT ValVT = ST->getValue().getSimpleValueType();
+ if (StVT.isVector() && ST->isTruncatingStore())
+ switch (TLI.getTruncStoreAction(ValVT, StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal:
+ return TranslateLegalizeResults(Op, Result);
+ case TargetLowering::Custom: {
+ SDValue Lowered = TLI.LowerOperation(Result, DAG);
+ Changed = Lowered != Result;
+ return TranslateLegalizeResults(Op, Lowered);
+ }
+ case TargetLowering::Expand:
+ Changed = true;
+ return LegalizeOp(ExpandStore(Op));
+ }
+ } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
+ HasVectorValue = true;
+
+ for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+ J != E;
+ ++J)
+ HasVectorValue |= J->isVector();
+ if (!HasVectorValue)
+ return TranslateLegalizeResults(Op, Result);
+
+ EVT QueryType;
+ switch (Op.getOpcode()) {
+ default:
+ return TranslateLegalizeResults(Op, Result);
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::BSWAP:
+ case ISD::BITREVERSE:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ case ISD::SELECT_CC:
+ case ISD::SETCC:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::FCOPYSIGN:
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FFLOOR:
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ case ISD::FMA:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ QueryType = Node->getValueType(0);
+ break;
+ case ISD::FP_ROUND_INREG:
+ QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ QueryType = Node->getOperand(0).getValueType();
+ break;
+ case ISD::MSCATTER:
+ QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
+ break;
+ case ISD::MSTORE:
+ QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType();
+ break;
+ }
+
+ switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Promote:
+ Result = Promote(Op);
+ Changed = true;
+ break;
+ case TargetLowering::Legal:
+ break;
+ case TargetLowering::Custom: {
+ if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
+ Result = Tmp1;
+ break;
+ }
+ // FALL THROUGH
+ }
+ case TargetLowering::Expand:
+ Result = Expand(Op);
+ }
+
+ // Make sure that the generated code is itself legal.
+ if (Result != Op) {
+ Result = LegalizeOp(Result);
+ Changed = true;
+ }
+
+ // Note that LegalizeOp may be reentered even from single-use nodes, which
+ // means that we always must cache transformed nodes.
+ AddLegalizedOperand(Op, Result);
+ return Result;
+}
+
+SDValue VectorLegalizer::Promote(SDValue Op) {
+ // For a few operations there is a specific concept for promotion based on
+ // the operand's type.
+ switch (Op.getOpcode()) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // "Promote" the operation by extending the operand.
+ return PromoteINT_TO_FP(Op);
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ // Promote the operation by extending the operand.
+ return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+ }
+
+ // There are currently two cases of vector promotion:
+ // 1) Bitcasting a vector of integers to a different type to a vector of the
+ // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
+ // 2) Extending a vector of floats to a vector of the same number of larger
+ // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
+ MVT VT = Op.getSimpleValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ SDLoc dl(Op);
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ if (Op.getOperand(j)
+ .getValueType()
+ .getVectorElementType()
+ .isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
+ Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
+ if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
+ (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
+ NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
+ return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
+ else
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
+ // INT_TO_FP operations may require the input operand be promoted even
+ // when the type is otherwise legal.
+ EVT VT = Op.getOperand(0).getValueType();
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+
+ // Normal getTypeToPromoteTo() doesn't work here, as that will promote
+ // by widening the vector w/ the same element width and twice the number
+ // of elements. We want the other way around, the same number of elements,
+ // each twice the width.
+ //
+ // Increase the bitwidth of the element to the next pow-of-two
+ // (which is greater than 8 bits).
+
+ EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ assert(NVT.isSimple() && "Promoting to a non-simple vector type!");
+ SDLoc dl(Op);
+ SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+ unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
+ ISD::SIGN_EXTEND;
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (Op.getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
+ else
+ Operands[j] = Op.getOperand(j);
+ }
+
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
+}
+
+// For FP_TO_INT we promote the result type to a vector type with wider
+// elements and then truncate the result. This is different from the default
+// PromoteVector which uses bitcast to promote thus assumning that the
+// promoted vector type has the same overall size.
+SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "Can't promote a vector with multiple results!");
+ EVT VT = Op.getValueType();
+
+ EVT NewVT;
+ unsigned NewOpc;
+ while (1) {
+ NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
+ if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
+ NewOpc = ISD::FP_TO_SINT;
+ break;
+ }
+ if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) {
+ NewOpc = ISD::FP_TO_UINT;
+ break;
+ }
+ }
+
+ SDLoc loc(Op);
+ SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted);
+}
+
+
+SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+
+ EVT SrcVT = LD->getMemoryVT();
+ EVT SrcEltVT = SrcVT.getScalarType();
+ unsigned NumElem = SrcVT.getVectorNumElements();
+
+
+ SDValue NewChain;
+ SDValue Value;
+ if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
+ SDLoc dl(Op);
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+
+ EVT DstEltVT = LD->getValueType(0).getScalarType();
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ // When elements in a vector is not byte-addressable, we cannot directly
+ // load each element by advancing pointer, which could only address bytes.
+ // Instead, we load all significant words, mask bits off, and concatenate
+ // them to form each element. Finally, they are extended to destination
+ // scalar type to build the destination vector.
+ EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
+
+ assert(WideVT.isRound() &&
+ "Could not handle the sophisticated case when the widest integer is"
+ " not power of 2.");
+ assert(WideVT.bitsGE(SrcEltVT) &&
+ "Type is not legalized?");
+
+ unsigned WideBytes = WideVT.getStoreSize();
+ unsigned Offset = 0;
+ unsigned RemainingBytes = SrcVT.getStoreSize();
+ SmallVector<SDValue, 8> LoadVals;
+
+ while (RemainingBytes > 0) {
+ SDValue ScalarLoad;
+ unsigned LoadBytes = WideBytes;
+
+ if (RemainingBytes >= LoadBytes) {
+ ScalarLoad =
+ DAG.getLoad(WideVT, dl, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ } else {
+ EVT LoadVT = WideVT;
+ while (RemainingBytes < LoadBytes) {
+ LoadBytes >>= 1; // Reduce the load size by half.
+ LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
+ }
+ ScalarLoad =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset), LoadVT,
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ }
+
+ RemainingBytes -= LoadBytes;
+ Offset += LoadBytes;
+ BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+ DAG.getConstant(LoadBytes, dl,
+ BasePTR.getValueType()));
+
+ LoadVals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ // Extract bits, pack and extend/trunc them into destination type.
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
+
+ unsigned BitOffset = 0;
+ unsigned WideIdx = 0;
+ unsigned WideBits = WideVT.getSizeInBits();
+
+ for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
+ SDValue Lo, Hi, ShAmt;
+
+ if (BitOffset < WideBits) {
+ ShAmt = DAG.getConstant(
+ BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
+ }
+
+ BitOffset += SrcEltBits;
+ if (BitOffset >= WideBits) {
+ WideIdx++;
+ BitOffset -= WideBits;
+ if (BitOffset > 0) {
+ ShAmt = DAG.getConstant(
+ SrcEltBits - BitOffset, dl,
+ TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
+ }
+ }
+
+ if (Hi.getNode())
+ Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
+
+ switch (ExtType) {
+ default: llvm_unreachable("Unknown extended-load op!");
+ case ISD::EXTLOAD:
+ Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::ZEXTLOAD:
+ Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ case ISD::SEXTLOAD:
+ ShAmt =
+ DAG.getConstant(WideBits - SrcEltBits, dl,
+ TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
+ Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
+ break;
+ }
+ Vals.push_back(Lo);
+ }
+
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getNode()->getValueType(0), Vals);
+ } else {
+ SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
+
+ NewChain = Scalarized.getValue(1);
+ Value = Scalarized.getValue(0);
+ }
+
+ AddLegalizedOperand(Op.getValue(0), Value);
+ AddLegalizedOperand(Op.getValue(1), NewChain);
+
+ return (Op.getResNo() ? NewChain : Value);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDValue Op) {
+ StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+
+ EVT StVT = ST->getMemoryVT();
+ EVT MemSclVT = StVT.getScalarType();
+ unsigned ScalarSize = MemSclVT.getSizeInBits();
+
+ // Round odd types to the next pow of two.
+ if (!isPowerOf2_32(ScalarSize)) {
+ // FIXME: This is completely broken and inconsistent with ExpandLoad
+ // handling.
+
+ // For sub-byte element sizes, this ends up with 0 stride between elements,
+ // so the same element just gets re-written to the same location. There seem
+ // to be tests explicitly testing for this broken behavior though. tests
+ // for this broken behavior.
+
+ LLVMContext &Ctx = *DAG.getContext();
+
+ EVT NewMemVT
+ = EVT::getVectorVT(Ctx,
+ MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)),
+ StVT.getVectorNumElements());
+
+ SDValue NewVectorStore = DAG.getTruncStore(
+ ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(),
+ ST->getPointerInfo(), NewMemVT, ST->getAlignment(),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ ST = cast<StoreSDNode>(NewVectorStore.getNode());
+ }
+
+ SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
+ AddLegalizedOperand(Op, TF);
+ return TF;
+}
+
+SDValue VectorLegalizer::Expand(SDValue Op) {
+ switch (Op->getOpcode()) {
+ case ISD::SIGN_EXTEND_INREG:
+ return ExpandSEXTINREG(Op);
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return ExpandANY_EXTEND_VECTOR_INREG(Op);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return ExpandZERO_EXTEND_VECTOR_INREG(Op);
+ case ISD::BSWAP:
+ return ExpandBSWAP(Op);
+ case ISD::VSELECT:
+ return ExpandVSELECT(Op);
+ case ISD::SELECT:
+ return ExpandSELECT(Op);
+ case ISD::UINT_TO_FP:
+ return ExpandUINT_TO_FLOAT(Op);
+ case ISD::FNEG:
+ return ExpandFNEG(Op);
+ case ISD::SETCC:
+ return UnrollVSETCC(Op);
+ case ISD::BITREVERSE:
+ return ExpandBITREVERSE(Op);
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op);
+ default:
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+}
+
+SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+ // Lower a select instruction where the condition is a scalar and the
+ // operands are vectors. Lower this select to VSELECT and implement it
+ // using XOR AND OR. The selector bit is broadcasted.
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ assert(VT.isVector() && !Mask.getValueType().isVector()
+ && Op1.getValueType() == Op2.getValueType() && "Invalid type");
+
+ unsigned NumElem = VT.getVectorNumElements();
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Generate a mask operand.
+ EVT MaskTy = VT.changeVectorElementTypeToInteger();
+
+ // What is the size of each element in the vector mask.
+ EVT BitTy = MaskTy.getScalarType();
+
+ Mask = DAG.getSelect(DL, BitTy, Mask,
+ DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
+ BitTy),
+ DAG.getConstant(0, DL, BitTy));
+
+ // Broadcast the mask so that the entire vector is all-one or all zero.
+ SmallVector<SDValue, 8> Ops(NumElem, Mask);
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops);
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
+SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Make sure that the SRA and SHL instructions are available.
+ if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ SDLoc DL(Op);
+ EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+
+ unsigned BW = VT.getScalarType().getSizeInBits();
+ unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
+ SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
+
+ Op = Op.getOperand(0);
+ Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
+ return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
+}
+
+// Generically expand a vector anyext in register to a shuffle of the relevant
+// lanes into the appropriate locations, with other lanes left undef.
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // Build a base mask of undef shuffles.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.resize(NumSrcElements, -1);
+
+ // Place the extended lanes into the correct locations.
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
+
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
+}
+
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // First build an any-extend node which can be legalized above when we
+ // recurse through it.
+ Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
+
+ // Now we need sign extend. Do this by shifting the elements. Even if these
+ // aren't legal operations, they have a better chance of being legalized
+ // without full scalarization than the sign extension does.
+ unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
+ unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
+ SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
+ return DAG.getNode(ISD::SRA, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
+ ShiftAmount);
+}
+
+// Generically expand a vector zext in register to a shuffle of the relevant
+// lanes into the appropriate locations, a blend of zero into the high bits,
+// and a bitcast to the wider element type.
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // Build up a zero vector to blend into this one.
+ SDValue Zero = DAG.getConstant(0, DL, SrcVT);
+
+ // Shuffle the incoming lanes into the correct position, and pull all other
+ // lanes from the zero vector.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.reserve(NumSrcElements);
+ for (int i = 0; i < NumSrcElements; ++i)
+ ShuffleMask.push_back(i);
+
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
+}
+
+static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
+ for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
+ for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
+ ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
+}
+
+SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Generate a byte wise shuffle mask for the BSWAP.
+ SmallVector<int, 16> ShuffleMask;
+ createBSWAPShuffleMask(VT, ShuffleMask);
+ EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
+
+ // Only emit a shuffle if the mask is legal.
+ if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ SDLoc DL(Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+}
+
+SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // If we have the scalar operation, it's probably cheaper to unroll it.
+ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // If the vector element width is a whole number of bytes, test if its legal
+ // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
+ // vector. This greatly reduces the number of bit shifts necessary.
+ unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
+ if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
+ SmallVector<int, 16> BSWAPMask;
+ createBSWAPShuffleMask(VT, BSWAPMask);
+
+ EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
+ if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
+ (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
+ (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
+ SDLoc DL(Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
+ BSWAPMask);
+ Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+ }
+
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Let LegalizeDAG handle this later.
+ return Op;
+}
+
+SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
+ // Implement VSELECT in terms of XOR, AND, OR
+ // on platforms which do not support blend natively.
+ SDLoc DL(Op);
+
+ SDValue Mask = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+
+ EVT VT = Mask.getValueType();
+
+ // If we can't even use the basic vector operations of
+ // AND,OR,XOR, we will have to scalarize the op.
+ // Notice that the operation may be 'promoted' which means that it is
+ // 'bitcasted' to another type which is handled.
+ // This operation also isn't safe with AND, OR, XOR when the boolean
+ // type is 0/1 as we need an all ones vector constant to mask with.
+ // FIXME: Sign extend 1 to all ones if thats legal on the target.
+ if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getBooleanContents(Op1.getValueType()) !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // If the mask and the type are different sizes, unroll the vector op. This
+ // can occur when getSetCCResultType returns something that is different in
+ // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
+ if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits())
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ // Bitcast the operands to be the same type as the mask.
+ // This is needed when we select between FP types because
+ // the mask is a vector of integers.
+ Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
+ Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
+
+ SDValue AllOnes = DAG.getConstant(
+ APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT);
+ SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+
+ Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
+ Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
+ SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+}
+
+SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDLoc DL(Op);
+
+ // Make sure that the SINT_TO_FP and SRL instructions are available.
+ if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ EVT SVT = VT.getScalarType();
+ assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
+ "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+
+ unsigned BW = SVT.getSizeInBits();
+ SDValue HalfWord = DAG.getConstant(BW/2, DL, VT);
+
+ // Constants to clear the upper part of the word.
+ // Notice that we can also use SHL+SHR, but using a constant is slightly
+ // faster on x86.
+ uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+ SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
+
+ // Two to the power of half-word-size.
+ SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType());
+
+ // Clear upper part of LO, lower HI
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+
+ // Add the two halves
+ return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+}
+
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+ SDLoc DL(Op);
+ SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+ // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
+ return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
+ Zero, Op.getOperand(0));
+ }
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
+ unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
+ if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) {
+ SDLoc DL(Op);
+ return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0));
+ }
+
+ // Otherwise go ahead and unroll.
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ SDLoc dl(Op);
+ SmallVector<SDValue, 8> Ops(NumElems);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue LHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue RHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i] = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TmpEltVT),
+ LHSElem, RHSElem, CC);
+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+ return VectorLegalizer(*this).Run();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 000000000000..f3adca49ccfe
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,3846 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size. For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to scalarize the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
+ case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::ANY_EXTEND:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ R = ScalarizeVecRes_UnaryOp(N);
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ R = ScalarizeVecRes_BinOp(N);
+ break;
+ case ISD::FMA:
+ R = ScalarizeVecRes_TernaryOp(N);
+ break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ Op0.getValueType(), Op0, Op1, Op2);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetScalarizedVector(Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
+ NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ // The BUILD_VECTOR operands may be of wider element types and
+ // we may need to truncate them back to the requested return type.
+ if (EltVT.isInteger())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ return DAG.getConvertRndSat(NewVT, SDLoc(N),
+ Op0, DAG.getValueType(NewVT),
+ DAG.getValueType(Op0.getValueType()),
+ N->getOperand(3),
+ N->getOperand(4),
+ cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ NewVT, Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FPOWI, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(
+ ISD::UNINDEXED, N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
+ N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
+ N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
+ N->getAAInfo());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ EVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDLoc DL(N);
+ // The result needs scalarizing, but it's not a given that the source does.
+ // This is a workaround for targets where it's impossible to scalarize the
+ // result of a conversion, because the source type is legal.
+ // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
+ // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
+ // legal and was not scalarized.
+ // See the similar logic in ScalarizeVecRes_VSETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT,
+ LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+ SDValue Cond = GetScalarizedVector(N->getOperand(0));
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ TargetLowering::BooleanContent ScalarBool =
+ TLI.getBooleanContents(false, false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
+
+ // If integer and float booleans have different contents then we can't
+ // reliably optimize in all cases. There is a full explanation for this in
+ // DAGCombiner::visitSELECT() where the same issue affects folding
+ // (select C, 0, 1) to (xor C, 1).
+ if (TLI.getBooleanContents(false, false) !=
+ TLI.getBooleanContents(false, true)) {
+ // At least try the common case where the boolean is generated by a
+ // comparison.
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT OpVT = Cond->getOperand(0)->getValueType(0);
+ ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
+ VecBool = TLI.getBooleanContents(OpVT);
+ } else
+ ScalarBool = TargetLowering::UndefinedBooleanContent;
+ }
+
+ if (ScalarBool != VecBool) {
+ EVT CondVT = Cond.getValueType();
+ switch (ScalarBool) {
+ case TargetLowering::UndefinedBooleanContent:
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+ // Vector read from all ones, scalar expects a single 1 so mask.
+ Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
+ Cond, DAG.getConstant(1, SDLoc(N), CondVT));
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrOneBooleanContent);
+ // Vector reads from a one, scalar from all ones so sign extend.
+ Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT,
+ Cond, DAG.getValueType(MVT::i1));
+ break;
+ }
+ }
+
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+
+ if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
+
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ SDLoc DL(N);
+
+ // Turn it into a scalar SETCC.
+ return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.isUndef())
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT OpVT = LHS.getValueType();
+ EVT NVT = N->getValueType(0).getVectorElementType();
+ SDLoc DL(N);
+
+ // The result needs scalarizing, but it's not a given that the source does.
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ LHS = GetScalarizedVector(LHS);
+ RHS = GetScalarizedVector(RHS);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ LHS = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ RHS = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (!Res.getNode()) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to scalarize this operator's operand!");
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Res = ScalarizeVecOp_UnaryOp(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::VSELECT:
+ Res = ScalarizeVecOp_VSELECT(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::FP_ROUND:
+ Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// If the value to convert is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
+ N->getValueType(0), Elt);
+}
+
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
+ N->getValueType(0).getScalarType(), Elt);
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op);
+}
+
+/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops);
+}
+
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
+/// so just return the element, ignoring the index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0),
+ Res);
+ return Res;
+}
+
+
+/// If the input condition is a vector that needs to be scalarized, it must be
+/// <1 x i1>, so just convert to a normal ISD::SELECT
+/// (still with vector output type since that was acceptable if we got here).
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
+ SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
+ EVT VT = N->getValueType(0);
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
+ N->getOperand(2));
+}
+
+/// If the value to store is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ SDLoc dl(N);
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(
+ N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(), N->getAlignment(),
+ N->getMemOperand()->getFlags(), N->getAAInfo());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getOriginalAlignment(), N->getMemOperand()->getFlags(),
+ N->getAAInfo());
+}
+
+/// If the value to round is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0).getVectorElementType(), Elt,
+ N->getOperand(1));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// This method is called when the specified result of the specified node is
+/// found to need vector splitting. At this point, the node may also have
+/// invalid operands or may have other results that need legalization, we just
+/// know that (at least) one result needs vector splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Split node result: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::LOAD:
+ SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::MLOAD:
+ SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::MGATHER:
+ SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
+ break;
+ case ISD::SETCC:
+ SplitVecRes_SETCC(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
+ break;
+
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CONVERT_RNDSAT:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ SplitVecRes_ExtendOp(N, Lo, Hi);
+ break;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::FPOW:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ SplitVecRes_BinOp(N, Lo, Hi);
+ break;
+ case ISD::FMA:
+ SplitVecRes_TernaryOp(N, Lo, Hi);
+ break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ SDLoc dl(N);
+
+ const SDNodeFlags *Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+}
+
+void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op0Lo, Op0Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ SDValue Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ SDValue Op2Lo, Op2Hi;
+ GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
+ SDLoc dl(N);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
+ Op0Lo, Op1Lo, Op2Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
+ Op0Hi, Op1Hi, Op2Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeWidenVector:
+ break;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case TargetLowering::TypeSplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+ EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps);
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps);
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ SDLoc dl(N);
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ EVT VecVT = Vec.getValueType();
+ EVT VecElemVT = VecVT.getVectorElementType();
+ unsigned VecElems = VecVT.getVectorNumElements();
+ unsigned SubElems = SubVec.getValueType().getVectorNumElements();
+
+ // If we know the index is 0, and we know the subvector doesn't cross the
+ // boundary between the halves, we can avoid spilling the vector, and insert
+ // into the lower half of the split vector directly.
+ // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever
+ // the index is constant and there is no boundary crossing. But those cases
+ // don't seem to get hit in practice.
+ if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = ConstIdx->getZExtValue();
+ if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
+ return;
+ }
+ }
+
+ // Spill the vector to the stack.
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+
+ // Store the new subvector into the specified index.
+ SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
+ Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo());
+
+ // Load the Lo part from the stack slot.
+ Lo =
+ DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr =
+ DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc DL(N);
+
+ SDValue RHSLo, RHSHi;
+ SDValue RHS = N->getOperand(1);
+ EVT RHSVT = RHS.getValueType();
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
+
+
+ Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc dl(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) =
+ DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+ DAG.getValueType(LoVT));
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+ DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+
+ SDLoc dl(N);
+ SDValue InLo, InHi;
+ GetSplitVector(N0, InLo, InHi);
+ EVT InLoVT = InLo.getValueType();
+ unsigned InNumElements = InLoVT.getVectorNumElements();
+
+ EVT OutLoVT, OutHiVT;
+ std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ unsigned OutNumElements = OutLoVT.getVectorNumElements();
+ assert((2 * OutNumElements) <= InNumElements &&
+ "Illegal extend vector in reg split");
+
+ // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the
+ // input vector (i.e. we only use InLo):
+ // OutLo will extend the first OutNumElements from InLo.
+ // OutHi will extend the next OutNumElements from InLo.
+
+ // Shuffle the elements from InLo for OutHi into the bottom elements to
+ // create a 'fake' InHi.
+ SmallVector<int, 8> SplitHi(InNumElements, -1);
+ for (unsigned i = 0; i != OutNumElements; ++i)
+ SplitHi[i] = i + OutNumElements;
+ InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi);
+
+ Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo);
+ Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ if (IdxVal < LoNumElts)
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ else
+ Hi =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getConstant(IdxVal - LoNumElts, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return;
+ }
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return;
+
+ // Spill the vector to the stack.
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
+ Store =
+ DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT);
+
+ // Load the Lo part from the stack slot.
+ Lo =
+ DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, dl,
+ StackPtr.getValueType()));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ EVT MemoryVT = LD->getMemoryVT();
+ unsigned Alignment = LD->getOriginalAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
+ Alignment, MMOFlags, AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
+ SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(MLD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Ch = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ SDValue Mask = MLD->getMask();
+ SDValue Src0 = MLD->getSrc0();
+ unsigned Alignment = MLD->getOriginalAlignment();
+ ISD::LoadExtType ExtType = MLD->getExtensionType();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+ EVT MemoryVT = MLD->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue Src0Lo, Src0Hi;
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
+ ExtType);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
+ ExtType);
+
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MLD, 1), Ch);
+
+}
+
+void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
+ SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(MGT);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
+
+ SDValue Ch = MGT->getChain();
+ SDValue Ptr = MGT->getBasePtr();
+ SDValue Mask = MGT->getMask();
+ SDValue Src0 = MGT->getValue();
+ SDValue Index = MGT->getIndex();
+ unsigned Alignment = MGT->getOriginalAlignment();
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+ EVT MemoryVT = MGT->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ // Split MemoryVT
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue Src0Lo, Src0Hi;
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+
+ SDValue IndexHi, IndexLo;
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MGT->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+ SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+ Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
+ MMO);
+
+ SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+ Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
+ MMO);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MGT, 1), Ch);
+}
+
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
+ EVT LoVT, HiVT;
+ SDLoc DL(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Split the input.
+ SDValue LL, LH, RL, RH;
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ EVT InVT = N->getOperand(0).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ else
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+
+ if (N->getOpcode() == ISD::FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+ SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DestVT = N->getValueType(0);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
+
+ // We can do better than a generic split operation if the extend is doing
+ // more than just doubling the width of the elements and the following are
+ // true:
+ // - The number of vector elements is even,
+ // - the source type is legal,
+ // - the type of a split source is illegal,
+ // - the type of an extended (by doubling element size) source is legal, and
+ // - the type of that extended source when split is legal.
+ //
+ // This won't necessarily completely legalize the operation, but it will
+ // more effectively move in the right direction and prevent falling down
+ // to scalarization in many cases due to the input vector being split too
+ // far.
+ unsigned NumElements = SrcVT.getVectorNumElements();
+ if ((NumElements & 1) == 0 &&
+ SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewSrcVT = EVT::getVectorVT(
+ Ctx, EVT::getIntegerVT(
+ Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2),
+ NumElements);
+ EVT SplitSrcVT =
+ EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
+ EVT SplitLoVT, SplitHiVT;
+ std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
+ if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
+ TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
+ DEBUG(dbgs() << "Split vector extend via incremental extend:";
+ N->dump(&DAG); dbgs() << "\n");
+ // Extend the source vector by one step.
+ SDValue NewSrc =
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ // Get the low and high halves of the new, extended one step, vector.
+ std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+ // Extend those vector halves the rest of the way.
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ return;
+ }
+ }
+ // Fall back to the generic unary operator splitting otherwise.
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ EVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool useBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ useBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (useBuildVector) {
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input],
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ }
+
+ // Construct the Lo/Hi output using a BUILD_VECTOR.
+ Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps);
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = DAG.getUNDEF(NewVT);
+ } else {
+ SDValue Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = InputUsed[1] == -1U ?
+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops);
+ }
+
+ Ops.clear();
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// This method is called when the specified operand of the specified node is
+/// found to need vector splitting. At this point, all of the result types of
+/// the node are known to be legal, but other operands of the node may need
+/// legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Split node operand: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom split this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ if (!Res.getNode()) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::TRUNCATE:
+ Res = SplitVecOp_TruncateHelper(N);
+ break;
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSTORE:
+ Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSCATTER:
+ Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
+ break;
+ case ISD::MGATHER:
+ Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
+ break;
+ case ISD::VSELECT:
+ Res = SplitVecOp_VSELECT(N, OpNo);
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+ Res = SplitVecOp_TruncateHelper(N);
+ else
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+ Res = SplitVecOp_TruncateHelper(N);
+ else
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::FTRUNC:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
+ // The only possibility for an illegal operand is the mask, since result type
+ // legalization would have handled this node already otherwise.
+ assert(OpNo == 0 && "Illegal operand must be mask");
+
+ SDValue Mask = N->getOperand(0);
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ EVT Src0VT = Src0.getValueType();
+ SDLoc DL(N);
+ assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
+
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ assert(Lo.getValueType() == Hi.getValueType() &&
+ "Lo and Hi have differing types");
+
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
+ assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
+
+ SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
+ std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
+ std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+
+ SDValue LoSelect =
+ DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
+ SDValue HiSelect =
+ DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal.
+ EVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoElts) {
+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, dl,
+ Idx.getValueType()));
+ }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec.getValueType();
+
+ if (isa<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+ if (IdxVal < LoElts)
+ return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
+ DAG.getConstant(IdxVal - LoElts, SDLoc(N),
+ Idx.getValueType())), 0);
+ }
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return SDValue();
+
+ // Make the vector elements byte-addressable if they aren't already.
+ SDLoc dl(N);
+ EVT EltVT = VecVT.getVectorElementType();
+ if (EltVT.getSizeInBits() < 8) {
+ SmallVector<SDValue, 4> ElementOps;
+ for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) {
+ ElementOps.push_back(DAG.getAnyExtOrTrunc(
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec,
+ DAG.getConstant(i, dl, MVT::i8)),
+ dl, MVT::i8));
+ }
+
+ EltVT = MVT::i8;
+ VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VecVT.getVectorNumElements());
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps);
+ }
+
+ // Store the vector to the stack.
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store =
+ DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+
+ // Load back the required element.
+ StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo(), EltVT);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
+ unsigned OpNo) {
+ EVT LoVT, HiVT;
+ SDLoc dl(MGT);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
+
+ SDValue Ch = MGT->getChain();
+ SDValue Ptr = MGT->getBasePtr();
+ SDValue Index = MGT->getIndex();
+ SDValue Mask = MGT->getMask();
+ SDValue Src0 = MGT->getValue();
+ unsigned Alignment = MGT->getOriginalAlignment();
+
+ SDValue MaskLo, MaskHi;
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+ EVT MemoryVT = MGT->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue Src0Lo, Src0Hi;
+ if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Src0, Src0Lo, Src0Hi);
+ else
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+
+ SDValue IndexHi, IndexLo;
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MGT->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+ SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo};
+ SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
+ OpsLo, MMO);
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MGT->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ Alignment, MGT->getAAInfo(),
+ MGT->getRanges());
+
+ SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi};
+ SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
+ OpsHi, MMO);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MGT, 1), Ch);
+
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
+ Hi);
+ ReplaceValueWith(SDValue(MGT, 0), Res);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Mask = N->getMask();
+ SDValue Data = N->getValue();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ SDLoc DL(N);
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ SDValue MaskLo, MaskHi;
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+
+ MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
+ MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ SDValue Lo, Hi;
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
+
+ Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
+ N->isTruncatingStore());
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, N->getAAInfo(), N->getRanges());
+
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
+ N->isTruncatingStore());
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
+ unsigned OpNo) {
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Mask = N->getMask();
+ SDValue Index = N->getIndex();
+ SDValue Data = N->getValue();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ SDLoc DL(N);
+
+ // Split all operands
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Data operand
+ GetSplitVector(Data, DataLo, DataHi);
+ else
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ SDValue MaskLo, MaskHi;
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ // Split Mask operand
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+
+ SDValue IndexHi, IndexLo;
+ if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Index, IndexLo, IndexHi);
+ else
+ std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+
+ SDValue Lo, Hi;
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
+
+ SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
+ Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
+ DL, OpsLo, MMO);
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
+
+ SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
+ Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+ DL, OpsHi, MMO);
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ SDLoc DL(N);
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
+ Alignment, MMOFlags, AAInfo);
+ else
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
+ AAInfo);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, Alignment, MMOFlags, AAInfo);
+ else
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ Alignment, MMOFlags, AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ SDLoc DL(N);
+
+ // The input operands all must have the same type, and we know the result
+ // type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (const SDValue &Op : N->op_values()) {
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
+ // The result type is legal, but the input type is illegal. If splitting
+ // ends up with the result type of each half still being legal, just
+ // do that. If, however, that would result in an illegal result type,
+ // we can try to get more clever with power-two vectors. Specifically,
+ // split the input type, but also widen the result element size, then
+ // concatenate the halves and truncate again. For example, consider a target
+ // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
+ // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
+ // %inlo = v4i32 extract_subvector %in, 0
+ // %inhi = v4i32 extract_subvector %in, 4
+ // %lo16 = v4i16 trunc v4i32 %inlo
+ // %hi16 = v4i16 trunc v4i32 %inhi
+ // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
+ // %res = v8i8 trunc v8i16 %in16
+ //
+ // Without this transform, the original truncate would end up being
+ // scalarized, which is pretty much always a last resort.
+ SDValue InVec = N->getOperand(0);
+ EVT InVT = InVec->getValueType(0);
+ EVT OutVT = N->getValueType(0);
+ unsigned NumElements = OutVT.getVectorNumElements();
+ bool IsFloat = OutVT.isFloatingPoint();
+
+ // Widening should have already made sure this is a power-two vector
+ // if we're trying to split it at all. assert() that's true, just in case.
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+
+ unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
+ unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
+
+ // If the input elements are only 1/2 the width of the result elements,
+ // just use the normal splitting. Our trick only work if there's room
+ // to split more than once.
+ if (InElementSize <= OutElementSize * 2)
+ return SplitVecOp_UnaryOp(N);
+ SDLoc DL(N);
+
+ // Extract the halves of the input via extract_subvector.
+ SDValue InLoVec, InHiVec;
+ std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
+ // Truncate them to 1/2 the element size.
+ EVT HalfElementVT = IsFloat ?
+ EVT::getFloatingPointVT(InElementSize/2) :
+ EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
+ NumElements/2);
+ SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+ SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+ // Concatenate them to get the full intermediate truncation result.
+ EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
+ SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
+ HalfHi);
+ // Now finish up by truncating all the way down to the original result
+ // type. This should normally be something that ends up being legal directly,
+ // but in theory if a target has very wide vectors and an annoyingly
+ // restricted set of legal types, this split can chain to build things up.
+ return IsFloat
+ ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
+ DAG.getTargetConstant(
+ 0, DL, TLI.getPointerTy(DAG.getDataLayout())))
+ : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ // The result has a legal vector type, but the input needs splitting.
+ SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
+ SDLoc DL(N);
+ GetSplitVector(N->getOperand(0), Lo0, Hi0);
+ GetSplitVector(N->getOperand(1), Lo1, Hi1);
+ unsigned PartElements = Lo0.getValueType().getVectorNumElements();
+ EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
+ EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
+ return PromoteTargetBoolean(Con, N->getValueType(0));
+}
+
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc DL(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and the first input) has a legal vector type, but the second
+ // input needs splitting.
+ return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+
+ // See if the target wants to custom widen this node.
+ if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+ return;
+
+ SDValue Res = SDValue();
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen the result of this operator!");
+
+ case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+ break;
+ case ISD::MLOAD:
+ Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
+ break;
+ case ISD::MGATHER:
+ Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SUB:
+ case ISD::XOR:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNAN:
+ case ISD::FMAXNAN:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ Res = WidenVecRes_Binary(N);
+ break;
+
+ case ISD::FADD:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ Res = WidenVecRes_BinaryCanTrap(N);
+ break;
+
+ case ISD::FCOPYSIGN:
+ Res = WidenVecRes_FCOPYSIGN(N);
+ break;
+
+ case ISD::FPOWI:
+ Res = WidenVecRes_POWI(N);
+ break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ Res = WidenVecRes_Shift(N);
+ break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = WidenVecRes_EXTEND_VECTOR_INREG(N);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecRes_Convert(N);
+ break;
+
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ Res = WidenVecRes_Unary(N);
+ break;
+ case ISD::FMA:
+ Res = WidenVecRes_Ternary(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
+ // Ternary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = GetWidenedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
+ // Binary op widening for operations that can trap.
+ unsigned Opcode = N->getOpcode();
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ const SDNodeFlags *Flags = N->getFlags();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+ // Operation doesn't trap so just widen as normal.
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue EOp2 = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2, Flags);
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, SubConcatOps);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ makeArrayRef(ConcatOps.data(), NumOps));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ SDLoc DL(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ unsigned Opcode = N->getOpcode();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ const SDNodeFlags *Flags = N->getFlags();
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
+ }
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ // Extract the input and convert the shorten input vector.
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue InOp = N->getOperand(0);
+ SDLoc DL(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenSVT = WidenVT.getVectorElementType();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InSVT = InVT.getVectorElementType();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
+ switch (Opcode) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ }
+ }
+ }
+
+ // Unroll, extend the scalars and rebuild the vector.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ switch (Opcode) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
+ break;
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val);
+ break;
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val);
+ break;
+ default:
+ llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected");
+ }
+ Ops.push_back(Val);
+ }
+
+ while (Ops.size() != WidenNumElts)
+ Ops.push_back(DAG.getUNDEF(WidenSVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
+ // If this is an FCOPYSIGN with same input types, we can treat it as a
+ // normal (can trap) binary op.
+ if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
+ return WidenVecRes_BinaryCanTrap(N);
+
+ // If the types are different, fall back to unrolling.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+
+ EVT ShVT = ShOp.getValueType();
+ if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
+ ShOp = GetWidenedVector(ShOp);
+ ShVT = ShOp.getValueType();
+ }
+ EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ ShVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ if (ShVT != ShWidenVT)
+ ShOp = ModifyToType(ShOp, ShWidenVT);
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ cast<VTSDNode>(N->getOperand(1))->getVT()
+ .getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
+ SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetWidenedVector(WidenVec);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ InOp = GetPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypePromoteFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeSplitVector:
+ break;
+ case TargetLowering::TypeWidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ EVT NewInVT;
+ unsigned NewNumElts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
+ } else {
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i < NewNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ SDValue NewVec;
+ if (InVT.isVector())
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
+ else
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+ // Build a vector with undefined for the new nodes.
+ EVT VT = N->getValueType(0);
+
+ // Integer BUILD_VECTOR operands may be larger than the node's vector element
+ // type. The UNDEFs need to have the same type as the existing operands.
+ EVT EltVT = N->getOperand(0).getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
+ NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ EVT InVT = N->getOperand(0).getValueType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenVT.getVectorNumElements() /
+ InVT.getVectorNumElements();
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (!N->getOperand(i).isUndef())
+ break;
+
+ if (i == NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts, -1);
+ for (unsigned i = 0; i < NumInElts; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i + NumInElts] = i + WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ MaskOps);
+ }
+ }
+ }
+
+ // Fall back to use extracts and build vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+ SDLoc dl(N);
+ SDValue InOp = N->getOperand(0);
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ SDValue DTyOp = DAG.getValueType(WidenVT);
+ SDValue STyOp = DAG.getValueType(InWidenVT);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops);
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ InOp = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ DTyOp = DAG.getValueType(EltVT);
+ STyOp = DAG.getValueType(InEltVT);
+
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue ExtVal = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ EVT InVT = InOp.getValueType();
+
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i;
+ for (i=0; i < NumElts; ++i)
+ Ops[i] =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(IdxVal + i, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
+ SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue Src0 = GetWidenedVector(N->getSrc0());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ SDLoc dl(N);
+
+ if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ Mask = GetWidenedVector(Mask);
+ else {
+ EVT BoolVT = getSetCCResultType(WidenVT);
+
+ // We can't use ModifyToType() because we should fill the mask with
+ // zeroes
+ unsigned WidenNumElts = BoolVT.getVectorNumElements();
+ unsigned MaskNumElts = MaskVT.getVectorNumElements();
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ }
+
+ SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
+ Mask, Src0, N->getMemoryVT(),
+ N->getMemOperand(), ExtType);
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
+
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue Src0 = GetWidenedVector(N->getValue());
+ unsigned NumElts = WideVT.getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+ // Widen the Index operand
+ SDValue Index = N->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
+ WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue Cond1 = N->getOperand(0);
+ EVT CondVT = Cond1.getValueType();
+ if (CondVT.isVector()) {
+ EVT CondEltVT = CondVT.getVectorElementType();
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ CondEltVT, WidenNumElts);
+ if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ // If we have to split the condition there is no point in widening the
+ // select. This would result in an cycle of widening the select ->
+ // widening the condition operand -> splitting the condition operand ->
+ // splitting the select -> widening the select. Instead split this select
+ // further and widen the resulting type.
+ if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
+ SDValue Res = ModifyToType(SplitSelect, WidenVT);
+ return Res;
+ }
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+ if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT,
+ InOp1, InOp2, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = N->getOperand(0);
+ EVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non-vector type");
+ EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), WidenNumElts);
+
+ // The input and output types often differ here, and it could be that while
+ // we'd prefer to widen the result type, the input operands have been split.
+ // In this case, we also need to split the result of this node as well.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
+ SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
+ return Res;
+ }
+
+ InOp1 = GetWidenedVector(InOp1);
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ (void)WidenInVT;
+ return DAG.getNode(ISD::SETCC, SDLoc(N),
+ WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom widen this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen this operator's operand!");
+
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
+ case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecOp_EXTEND(N);
+ break;
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ Res = WidenVecOp_Convert(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue InOp = N->getOperand(0);
+ // If some legalization strategy other than widening is used on the operand,
+ // we can't safely assume that just extending the low lanes is the correct
+ // transformation.
+ if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
+ return WidenVecOp_Convert(N);
+ InOp = GetWidenedVector(InOp);
+ assert(VT.getVectorNumElements() <
+ InOp.getValueType().getVectorNumElements() &&
+ "Input wasn't widened!");
+
+ // We may need to further widen the operand until it has the same total
+ // vector size as the result.
+ EVT InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
+ EVT FixedVT = (MVT::SimpleValueType)i;
+ EVT FixedEltVT = FixedVT.getVectorElementType();
+ if (TLI.isTypeLegal(FixedVT) &&
+ FixedVT.getSizeInBits() == VT.getSizeInBits() &&
+ FixedEltVT == InEltVT) {
+ assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
+ "Not enough elements in the fixed type for the operand!");
+ assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
+ "We can't have the same type as we started with!");
+ if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
+ InOp = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ else
+ InOp = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ break;
+ }
+ }
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits())
+ // We couldn't find a legal vector type that was a widening of the input
+ // and could be extended in-register to the result type, so we have to
+ // scalarize.
+ return WidenVecOp_Convert(N);
+ }
+
+ // Use special DAG nodes to represent the operation of extending the
+ // low lanes.
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Extend legalization on on extend operation!");
+ case ISD::ANY_EXTEND:
+ return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
+ case ISD::SIGN_EXTEND:
+ return DAG.getSignExtendVectorInReg(InOp, DL, VT);
+ case ISD::ZERO_EXTEND:
+ return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
+ }
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+ // The result (and first input) is legal, but the second input is illegal.
+ // We can't do much to fix that, so just unroll and let the extracts off of
+ // the second input be widened as needed later.
+ return DAG.UnrollVectorOp(N);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal, it is unlikely that we
+ // can fix the input to a legal type so unroll the convert into some scalar
+ // code and create a nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ for (unsigned i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(
+ Opcode, dl, EltVT,
+ DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InWidenVT = InOp.getValueType();
+ SDLoc dl(N);
+
+ // Check if we can convert between two legal vector types and extract.
+ unsigned InWidenSize = InWidenVT.getSizeInBits();
+ unsigned Size = VT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
+ unsigned NewNumElts = InWidenSize / Size;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ // If the input vector is not legal, it is likely that we will not find a
+ // legal vector of the same size. Replace the concatenate vector with a
+ // nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ EVT InVT = N->getOperand(0).getValueType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ unsigned NumOperands = N->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value, but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SmallVector<SDValue, 16> StChain;
+ if (ST->isTruncatingStore())
+ GenWidenVectorTruncStores(StChain, ST);
+ else
+ GenWidenVectorStores(StChain, ST);
+
+ if (StChain.size() == 1)
+ return StChain[0];
+ else
+ return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ EVT MaskVT = Mask.getValueType();
+ SDValue StVal = MST->getValue();
+ // Widen the value
+ SDValue WideVal = GetWidenedVector(StVal);
+ SDLoc dl(N);
+
+ if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ Mask = GetWidenedVector(Mask);
+ else {
+ // The mask should be widened as well.
+ EVT BoolVT = getSetCCResultType(WideVal.getValueType());
+ // We can't use ModifyToType() because we should fill the mask with
+ // zeroes.
+ unsigned WidenNumElts = BoolVT.getVectorNumElements();
+ unsigned MaskNumElts = MaskVT.getVectorNumElements();
+
+ unsigned NumConcat = WidenNumElts / MaskNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
+ Ops[0] = Mask;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = ZeroVal;
+
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+ }
+ assert(Mask.getValueType().getVectorNumElements() ==
+ WideVal.getValueType().getVectorNumElements() &&
+ "Mask and data vectors should have the same number of elements");
+ return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
+ Mask, MST->getMemoryVT(), MST->getMemOperand(),
+ false);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can widen only data operand of mscatter");
+ MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+ SDValue DataOp = MSC->getValue();
+ SDValue Mask = MSC->getMask();
+
+ // Widen the value.
+ SDValue WideVal = GetWidenedVector(DataOp);
+ EVT WideVT = WideVal.getValueType();
+ unsigned NumElts = WideVal.getValueType().getVectorNumElements();
+ SDLoc dl(N);
+
+ // The mask should be widened as well.
+ Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+ // Widen index.
+ SDValue Index = MSC->getIndex();
+ EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+ Index.getValueType().getScalarType(),
+ NumElts);
+ Index = ModifyToType(Index, WideIndexVT);
+
+ SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+ MSC->getMemoryVT(), dl, Ops,
+ MSC->getMemOperand());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+ SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDLoc dl(N);
+
+ // WARNING: In this code we widen the compare instruction with garbage.
+ // This garbage may contain denormal floats which may be slow. Is this a real
+ // concern ? Should we zero the unused lanes if this is a float compare ?
+
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ InOp0.getValueType());
+ SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue CC = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ return PromoteTargetBoolean(CC, N->getValueType(0));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+// TLI: Target lowering used to determine legal types.
+// Width: Width left need to load/store.
+// WidenVT: The widen vector type to load to/store from
+// Align: If 0, don't allow use of a wider type
+// WidenEx: If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+ unsigned AlignInBits = Align*8;
+
+ // If we have one element to load/store, return it.
+ EVT RetVT = WidenEltVT;
+ if (Width == WidenEltWidth)
+ return RetVT;
+
+ // See if there is larger legal integer than the element type to load/store.
+ unsigned VT;
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+ if ((Action == TargetLowering::TypeLegal ||
+ Action == TargetLowering::TypePromoteInteger) &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ RetVT = MemVT;
+ break;
+ }
+ }
+
+ // See if there is a larger vector type to load/store that has the same vector
+ // element type and is evenly divisible with the WidenVT.
+ for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+ EVT MemVT = (MVT::SimpleValueType) VT;
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ return MemVT;
+ }
+ }
+
+ return RetVT;
+}
+
+// Builds a vector type from scalar loads
+// VecTy: Resulting Vector type
+// LDOps: Load operators to build a vector type
+// [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+ SmallVectorImpl<SDValue> &LdOps,
+ unsigned Start, unsigned End) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc dl(LdOps[Start]);
+ EVT LdTy = LdOps[Start].getValueType();
+ unsigned Width = VecTy.getSizeInBits();
+ unsigned NumElts = Width / LdTy.getSizeInBits();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+ unsigned Idx = 1;
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+ for (unsigned i = Start + 1; i != End; ++i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ NumElts = Width / NewLdTy.getSizeInBits();
+ NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
+ // Readjust position and vector position based on new load type.
+ Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+ LdTy = NewLdTy;
+ }
+ VecOp = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD) {
+ // The strategy assumes that we can efficiently load power-of-two widths.
+ // The routine chops the vector into the largest vector loads with the same
+ // element type or scalar loads and then recombines it to the widen vector
+ // type.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ int LdWidth = LdVT.getSizeInBits();
+ int WidthDiff = WidenWidth - LdWidth;
+ unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads.
+
+ // Find the vector type that can load from.
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ int NewVTWidth = NewVT.getSizeInBits();
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ Align, MMOFlags, AAInfo);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction.
+ if (LdWidth <= NewVTWidth) {
+ if (!NewVT.isVector()) {
+ unsigned NumElts = WidenWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+ }
+ if (NewVT == WidenVT)
+ return LdOp;
+
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+ }
+
+ // Load vector by using multiple loads from largest vector to scalar.
+ SmallVector<SDValue, 16> LdOps;
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
+ unsigned Offset = 0;
+
+ while (LdWidth > 0) {
+ unsigned Increment = NewVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Increment, dl, BasePtr.getValueType()));
+
+ SDValue L;
+ if (LdWidth < NewVTWidth) {
+ // The current type we are using is too large. Find a better size.
+ NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVTWidth = NewVT.getSizeInBits();
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Increment), MMOFlags, AAInfo);
+ LdChain.push_back(L.getValue(1));
+ if (L->getValueType(0).isVector()) {
+ SmallVector<SDValue, 16> Loads;
+ Loads.push_back(L);
+ unsigned size = L->getValueSizeInBits(0);
+ while (size < LdOp->getValueSizeInBits(0)) {
+ Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+ size += L->getValueSizeInBits(0);
+ }
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
+ }
+ } else {
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Increment), MMOFlags, AAInfo);
+ LdChain.push_back(L.getValue(1));
+ }
+
+ LdOps.push_back(L);
+
+
+ LdWidth -= NewVTWidth;
+ }
+
+ // Build the vector from the load operations.
+ unsigned End = LdOps.size();
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to load are power-of-2, and the scalar loads can be
+ // combined to make a power-of-2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First, combine the scalar loads to a vector.
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector.
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ makeArrayRef(&ConcatOps[Idx], End - Idx));
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
+
+ if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ makeArrayRef(&ConcatOps[Idx], End - Idx));
+
+ // We need to fill the rest with undefs to build the vector.
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD,
+ ISD::LoadExtType ExtType) {
+ // For extension loads, it may not be more efficient to chop up the vector
+ // and then extend it. Instead, we unroll the load and build a new vector.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ EVT EltVT = WidenVT.getVectorElementType();
+ EVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen.
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] =
+ DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
+ LdEltVT, Align, MMOFlags, AAInfo);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr,
+ DAG.getConstant(Offset, dl,
+ BasePtr.getValueType()));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ Align, MMOFlags, AAInfo);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs.
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+}
+
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
+ StoreSDNode *ST) {
+ // The strategy assumes that we can efficiently store power-of-two widths.
+ // The routine chops the vector into the largest vector stores with the same
+ // element type or scalar stores.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ SDLoc dl(ST);
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ EVT ValVT = ValOp.getValueType();
+ unsigned ValWidth = ValVT.getSizeInBits();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ assert(StVT.getVectorElementType() == ValEltVT);
+
+ int Idx = 0; // current index to store
+ unsigned Offset = 0; // offset from base to store
+ while (StWidth != 0) {
+ // Find the largest vector type we can store with.
+ EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+ unsigned NewVTWidth = NewVT.getSizeInBits();
+ unsigned Increment = NewVTWidth / 8;
+ if (NewVT.isVector()) {
+ unsigned NumVTElts = NewVT.getVectorNumElements();
+ do {
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ StChain.push_back(DAG.getStore(
+ Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Offset), MMOFlags, AAInfo));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ Idx += NumVTElts;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Increment, dl,
+ BasePtr.getValueType()));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } else {
+ // Cast the vector to the scalar type we can store.
+ unsigned NumElts = ValWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
+ // Readjust index position based on new vector type.
+ Idx = Idx * ValEltWidth / NewVTWidth;
+ do {
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getConstant(Idx++, dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ StChain.push_back(DAG.getStore(
+ Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
+ MinAlign(Align, Offset), MMOFlags, AAInfo));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Increment, dl,
+ BasePtr.getValueType()));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ // Restore index back to be relative to the original widen element type.
+ Idx = Idx * NewVTWidth / ValEltWidth;
+ }
+ }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
+ StoreSDNode *ST) {
+ // For extension loads, it may not be more efficient to truncate the vector
+ // and then store it. Instead, we extract each element and then store it.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ SDLoc dl(ST);
+
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ValOp.getValueType();
+
+ // It must be true that the wide vector type is bigger than where we need to
+ // store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ // For truncating stores, we can not play the tricks of chopping legal vector
+ // types and bitcast it to the right type. Instead, we unroll the store.
+ EVT StEltVT = StVT.getVectorElementType();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT, Align,
+ MMOFlags, AAInfo));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr,
+ DAG.getConstant(Offset, dl,
+ BasePtr.getValueType()));
+ SDValue EOp = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ StChain.push_back(DAG.getTruncStore(
+ Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo));
+ }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+/// FillWithZeroes specifies that the vector should be widened with zeroes.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
+ bool FillWithZeroes) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ EVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ SDLoc dl(InOp);
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+ unsigned NumConcat = WidenNumElts / InNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
+ DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = FillVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
+ }
+
+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ // Fall back to extract and build.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
+ DAG.getUNDEF(EltVT);
+ for ( ; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = FillVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 000000000000..1e5c4a73693f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,639 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "scheduler"
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<int> RegPressureThreshold(
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
+ : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ TRI = STI.getRegisterInfo();
+ TLI = IS->TLI;
+ TII = STI.getInstrInfo();
+ ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
+ // This hard requirement could be relaxed, but for now
+ // do not let it proceed.
+ assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end();
+ I != E; ++I)
+ RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+ ParallelLiveRanges = 0;
+ HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *PredSU = I->getSUnit();
+ const SDNode *ScegN = PredSU->getNode();
+
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: NumberDeps++; break;
+ case ISD::CopyToReg: break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+ unsigned RCId) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+
+ SUnit *SuccSU = I->getSUnit();
+ const SDNode *ScegN = SuccSU->getNode();
+ if (!ScegN)
+ continue;
+
+ // If value is passed to CopyToReg, it is probably
+ // live outside BB.
+ switch (ScegN->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor: break;
+ case ISD::CopyFromReg: break;
+ case ISD::CopyToReg: NumberDeps++; break;
+ case ISD::INLINEASM: break;
+ }
+ if (!ScegN->isMachineOpcode())
+ continue;
+
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (TLI->isTypeLegal(VT)
+ && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+ NumberDeps++;
+ break;
+ }
+ }
+ }
+ return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+ unsigned NumberDeps = 0;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (I->isCtrl())
+ NumberDeps++;
+
+ return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+ SUnit *SU = &(*SUnits)[i];
+ initNumRegDefsLeft(SU);
+ SU->NodeQueueId = 0;
+ }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+ // The isScheduleHigh flag allows nodes with wraparound dependencies that
+ // cannot easily be modeled as edges with latencies to be scheduled as
+ // soon as possible in a top-down schedule.
+ if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+ return false;
+
+ if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+ return true;
+
+ unsigned LHSNum = LHS->NodeNum;
+ unsigned RHSNum = RHS->NodeNum;
+
+ // The most important heuristic is scheduling the critical path.
+ unsigned LHSLatency = PQ->getLatency(LHSNum);
+ unsigned RHSLatency = PQ->getLatency(RHSNum);
+ if (LHSLatency < RHSLatency) return true;
+ if (LHSLatency > RHSLatency) return false;
+
+ // After that, if two nodes have identical latencies, look to see if one will
+ // unblock more other nodes than the other.
+ unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+ unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+ if (LHSBlocked < RHSBlocked) return true;
+ if (LHSBlocked > RHSBlocked) return false;
+
+ // Finally, just to provide a stable ordering, use the node number as a
+ // deciding factor.
+ return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+ SUnit *OnlyAvailablePred = nullptr;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ SUnit &Pred = *I->getSUnit();
+ if (!Pred.isScheduled) {
+ // We found an available, but not scheduled, predecessor. If it's the
+ // only one we have found, keep track of it... otherwise give up.
+ if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+ return nullptr;
+ OnlyAvailablePred = &Pred;
+ }
+ }
+ return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+ // Look at all of the successors of this node. Count the number of nodes that
+ // this node is the sole unscheduled node for.
+ unsigned NumNodesBlocking = 0;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I)
+ if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ ++NumNodesBlocking;
+
+ NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+ Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+ if (!SU || !SU->getNode())
+ return false;
+
+ // If this is a compound instruction,
+ // it is likely to be a call. Do not delay it.
+ if (SU->getNode()->getGluedNode())
+ return true;
+
+ // First see if the pipeline could receive this instruction
+ // in the current cycle.
+ if (SU->getNode()->isMachineOpcode())
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ if (!ResourcesModel->canReserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode())))
+ return false;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+
+ // Now see if there are no other dependencies
+ // to instructions already in the packet.
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+ E = Packet[i]->Succs.end(); I != E; ++I) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order deps.
+ if (I->isCtrl())
+ continue;
+
+ if (I->getSUnit() == SU)
+ return false;
+ }
+
+ return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+ // If this SU does not fit in the packet
+ // start a new one.
+ if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+ switch (SU->getNode()->getMachineOpcode()) {
+ default:
+ ResourcesModel->reserveResources(&TII->get(
+ SU->getNode()->getMachineOpcode()));
+ break;
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ break;
+ }
+ Packet.push_back(SU);
+ }
+ // Forcefully end packet for PseudoOps.
+ else {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+
+ // If packet is now full, reset the state so in the next cycle
+ // we start fresh.
+ if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ }
+}
+
+int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+ int RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ // Gen estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+ MVT VT = SU->getNode()->getSimpleValueType(i);
+ if (TLI->isTypeLegal(VT)
+ && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance += numberRCValSuccInSU(SU, RCId);
+ }
+ // Kill estimate.
+ for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = SU->getNode()->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (isa<ConstantSDNode>(Op.getNode()))
+ continue;
+
+ if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+ && TLI->getRegClassFor(VT)->getID() == RCId)
+ RegBalance -= numberRCValPredInSU(SU, RCId);
+ }
+ return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is achieved by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+ int RegBalance = 0;
+
+ if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+ return RegBalance;
+
+ if (RawPressure) {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+ else {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ if ((RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) > 0) &&
+ (RegPressure[RC->getID()] +
+ rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()]))
+ RegBalance += rawRegPressureDelta(SU, RC->getID());
+ }
+ }
+
+ return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 50;
+static const unsigned PriorityThree = 15;
+static const unsigned PriorityFour = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+ // Initial trivial priority.
+ int ResCount = 1;
+
+ // Do not waste time on a node that is already scheduled.
+ if (SU->isScheduled)
+ return ResCount;
+
+ // Forced priority is high.
+ if (SU->isScheduleHigh)
+ ResCount += PriorityOne;
+
+ // Adaptable scheduling
+ // A small, but very parallel
+ // region, where reg pressure is an issue.
+ if (HorizontalVerticalBalance > RegPressureThreshold) {
+ // Critical path first
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ // Consider change to reg pressure from scheduling
+ // this SU.
+ ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+ }
+ // Default heuristic, greeady and
+ // critical path driven.
+ else {
+ // Critical path first.
+ ResCount += (SU->getHeight() * ScaleTwo);
+ // Now see how many instructions is blocked by this SU.
+ ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+ // If resources are available for it, multiply the
+ // chance of scheduling.
+ if (isResourceAvailable(SU))
+ ResCount <<= FactorOne;
+
+ ResCount -= (regPressureDelta(SU) * ScaleTwo);
+ }
+
+ // These are platform-specific things.
+ // Will need to go into the back end
+ // and accessed from here via a hook.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.isCall())
+ ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
+ }
+ else
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ ResCount += PriorityFour;
+ break;
+
+ case ISD::INLINEASM:
+ ResCount += PriorityThree;
+ break;
+ }
+ }
+ return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+ // Use NULL entry as an event marker to reset
+ // the DFA state.
+ if (!SU) {
+ ResourcesModel->clearResources();
+ Packet.clear();
+ return;
+ }
+
+ const SDNode *ScegN = SU->getNode();
+ // Update reg pressure tracking.
+ // First update current node.
+ if (ScegN->isMachineOpcode()) {
+ // Estimate generated regs.
+ for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+ MVT VT = ScegN->getSimpleValueType(i);
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC)
+ RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+ }
+ }
+ // Estimate killed regs.
+ for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+ const SDValue &Op = ScegN->getOperand(i);
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+
+ if (TLI->isTypeLegal(VT)) {
+ const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+ if (RC) {
+ if (RegPressure[RC->getID()] >
+ (numberRCValPredInSU(SU, RC->getID())))
+ RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+ else RegPressure[RC->getID()] = 0;
+ }
+ }
+ }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+ continue;
+ --I->getSUnit()->NumRegDefsLeft;
+ }
+ }
+
+ // Reserve resources for this SU.
+ reserveResources(SU);
+
+ // Adjust number of parallel live ranges.
+ // Heuristic is simple - node with no data successors reduces
+ // number of live ranges. All others, increase it.
+ unsigned NumberNonControlDeps = 0;
+
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ adjustPriorityOfUnscheduledPreds(I->getSUnit());
+ if (!I->isCtrl())
+ NumberNonControlDeps++;
+ }
+
+ if (!NumberNonControlDeps) {
+ if (ParallelLiveRanges >= SU->NumPreds)
+ ParallelLiveRanges -= SU->NumPreds;
+ else
+ ParallelLiveRanges = 0;
+
+ }
+ else
+ ParallelLiveRanges += SU->NumRegDefsLeft;
+
+ // Track parallel live chains.
+ HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+ HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+ unsigned NodeNumDefs = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+ // No register need be allocated for this.
+ if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ NodeNumDefs = 0;
+ break;
+ }
+ NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+ }
+ else
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::CopyFromReg:
+ NodeNumDefs++;
+ break;
+ case ISD::INLINEASM:
+ NodeNumDefs++;
+ break;
+ }
+
+ SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled. If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet. If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+ if (SU->isAvailable) return; // All preds scheduled.
+
+ SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+ if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
+ return;
+
+ // Okay, we found a single predecessor that is available, but not scheduled.
+ // Since it is available, it must be in the priority queue. First remove it.
+ remove(OnlyAvailablePred);
+
+ // Reinsert the node into the priority queue, which recomputes its
+ // NumNodesSolelyBlocking value.
+ push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+ if (empty())
+ return nullptr;
+
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ if (!DisableDFASched) {
+ int BestCost = SUSchedulingCost(*Best);
+ for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I) {
+
+ if (SUSchedulingCost(*I) > BestCost) {
+ BestCost = SUSchedulingCost(*I);
+ Best = I;
+ }
+ }
+ }
+ // Use default TD scheduling mechanism.
+ else {
+ for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ }
+
+ SUnit *V = *Best;
+ if (Best != std::prev(Queue.end()))
+ std::swap(*Best, Queue.back());
+
+ Queue.pop_back();
+
+ return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != std::prev(Queue.end()))
+ std::swap(*I, Queue.back());
+
+ Queue.pop_back();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 000000000000..237d541b4cb9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,127 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
+
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/DataTypes.h"
+#include <utility>
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+public:
+ enum DbgValueKind {
+ SDNODE = 0, // value is the result of an expression
+ CONST = 1, // value is a constant
+ FRAMEIX = 2 // value is contents of a stack location
+ };
+private:
+ union {
+ struct {
+ SDNode *Node; // valid for expressions
+ unsigned ResNo; // valid for expressions
+ } s;
+ const Value *Const; // valid for constants
+ unsigned FrameIx; // valid for stack objects
+ } u;
+ MDNode *Var;
+ MDNode *Expr;
+ uint64_t Offset;
+ DebugLoc DL;
+ unsigned Order;
+ enum DbgValueKind kind;
+ bool IsIndirect;
+ bool Invalid = false;
+
+public:
+ // Constructor for non-constants.
+ SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir,
+ uint64_t off, DebugLoc dl, unsigned O)
+ : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
+ IsIndirect(indir) {
+ kind = SDNODE;
+ u.s.Node = N;
+ u.s.ResNo = R;
+ }
+
+ // Constructor for constants.
+ SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off,
+ DebugLoc dl, unsigned O)
+ : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
+ IsIndirect(false) {
+ kind = CONST;
+ u.Const = C;
+ }
+
+ // Constructor for frame indices.
+ SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl,
+ unsigned O)
+ : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
+ IsIndirect(false) {
+ kind = FRAMEIX;
+ u.FrameIx = FI;
+ }
+
+ // Returns the kind.
+ DbgValueKind getKind() const { return kind; }
+
+ // Returns the MDNode pointer for the variable.
+ MDNode *getVariable() const { return Var; }
+
+ // Returns the MDNode pointer for the expression.
+ MDNode *getExpression() const { return Expr; }
+
+ // Returns the SDNode* for a register ref
+ SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; }
+
+ // Returns the ResNo for a register ref
+ unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; }
+
+ // Returns the Value* for a constant
+ const Value *getConst() const { assert (kind==CONST); return u.Const; }
+
+ // Returns the FrameIx for a stack object
+ unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
+
+ // Returns whether this is an indirect value.
+ bool isIndirect() const { return IsIndirect; }
+
+ // Returns the offset.
+ uint64_t getOffset() const { return Offset; }
+
+ // Returns the DebugLoc.
+ DebugLoc getDebugLoc() const { return DL; }
+
+ // Returns the SDNodeOrder. This is the order of the preceding node in the
+ // input.
+ unsigned getOrder() const { return Order; }
+
+ // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+ // property. A SDDbgValue is invalid if the SDNode that produces the value is
+ // deleted.
+ void setIsInvalidated() { Invalid = true; }
+ bool isInvalidated() const { return Invalid; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 000000000000..62e7733ecd2b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,805 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "InstrEmitter.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical copies");
+
+static RegisterScheduler
+ fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+ createFastDAGScheduler);
+static RegisterScheduler
+ linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+ createDAGLinearizer);
+
+
+namespace {
+ /// FastPriorityQueue - A degenerate priority queue that considers
+ /// all nodes to have the same priority.
+ ///
+ struct FastPriorityQueue {
+ SmallVector<SUnit *, 16> Queue;
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ Queue.push_back(U);
+ }
+
+ SUnit *pop() {
+ if (empty()) return nullptr;
+ SUnit *V = Queue.back();
+ Queue.pop_back();
+ return V;
+ }
+ };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ FastPriorityQueue AvailableQueue;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::vector<SUnit*> LiveRegDefs;
+ std::vector<unsigned> LiveRegCycles;
+
+public:
+ ScheduleDAGFast(MachineFunction &mf)
+ : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule() override;
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ void AddPred(SUnit *SU, const SDep &D) {
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ SU->removePred(D);
+ }
+
+private:
+ void ReleasePred(SUnit *SU, SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ScheduleNodeBottomUp(SUnit*, unsigned);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVectorImpl<SUnit*>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
+ void ListScheduleBottomUp();
+
+ /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+ bool forceUnitLatencies() const override { return true; }
+};
+} // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+ DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+ NumLiveRegs = 0;
+ LiveRegDefs.resize(TRI->getNumRegs(), nullptr);
+ LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+ // Build the scheduling graph.
+ BuildSchedGraph(nullptr);
+
+ DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+ SUnits[su].dumpAll(this));
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+ AvailableQueue.push(PredSU);
+ }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ if (!LiveRegDefs[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ LiveRegCycles[I->getReg()] = CurCycle;
+ }
+ }
+ }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+ SU->setHeightToAtLeast(CurCycle);
+ Sequence.push_back(SU);
+
+ ReleasePredecessors(SU, CurCycle);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == SU &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = nullptr;
+ LiveRegCycles[I->getReg()] = 0;
+ }
+ }
+ }
+
+ SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+ if (SU->getNode()->getGluedNode())
+ return nullptr;
+
+ SDNode *N = SU->getNode();
+ if (!N)
+ return nullptr;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue)
+ return nullptr;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (const SDValue &Op : N->op_values()) {
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return nullptr;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return nullptr;
+
+ DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = newSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = newSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+ }
+
+ SDep ChainPred;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainPred = *I;
+ else if (I->getSUnit()->getNode() &&
+ I->getSUnit()->getNode()->isOperandOf(LoadNode))
+ LoadPreds.push_back(*I);
+ else
+ NodePreds.push_back(*I);
+ }
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ ChainSuccs.push_back(*I);
+ else
+ NodeSuccs.push_back(*I);
+ }
+
+ if (ChainPred.getSUnit()) {
+ RemovePred(SU, ChainPred);
+ if (isNewLoad)
+ AddPred(LoadSU, ChainPred);
+ }
+ for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+ const SDep &Pred = LoadPreds[i];
+ RemovePred(SU, Pred);
+ if (isNewLoad) {
+ AddPred(LoadSU, Pred);
+ }
+ }
+ for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+ const SDep &Pred = NodePreds[i];
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+ SDep D = NodeSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ }
+ for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+ SDep D = ChainSuccs[i];
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+ if (isNewLoad) {
+ SDep D(LoadSU, SDep::Barrier);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+ }
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ NewSU = Clone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I)
+ if (!I->isArtificial())
+ AddPred(NewSU, *I);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVectorImpl<SUnit*> &Copies) {
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr));
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr));
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isArtificial())
+ continue;
+ SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = *I;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, *I));
+ }
+ }
+ for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+ RemovePred(DelDeps[i].first, DelDeps[i].second);
+ }
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
+
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ }
+ return N->getSimpleValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVectorImpl<unsigned> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+ if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
+ if (RegAdded.insert(*AI).second) {
+ LRegs.push_back(*AI);
+ Added = true;
+ }
+ }
+ }
+ return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+ SmallVectorImpl<unsigned> &LRegs){
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep()) {
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
+ }
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+ if (!Node->isMachineOpcode())
+ continue;
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ }
+ return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+ unsigned CurCycle = 0;
+
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU, CurCycle);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue.push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ SmallVector<SUnit*, 4> NotReady;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue.empty()) {
+ bool Delayed = false;
+ LRegsMap.clear();
+ SUnit *CurSU = AvailableQueue.pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ Delayed = true;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ NotReady.push_back(CurSU);
+ CurSU = AvailableQueue.pop();
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try code duplication or inserting cross class copies
+ // to resolve it.
+ if (Delayed && !CurSU) {
+ if (!CurSU) {
+ // Try duplicating the nodes that produces these
+ // "expensive to copy" values to break the dependency. In case even
+ // that doesn't work, insert cross class copies.
+ SUnit *TrySU = NotReady[0];
+ SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be
+ // possible copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's
+ // possible to copy the value but it require cross register class copies
+ // and it is expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = nullptr;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical "
+ "register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ if (!CurSU) {
+ llvm_unreachable("Unable to resolve live physical register dependencies!");
+ }
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+ NotReady[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (NotReady[i]->isAvailable)
+ AvailableQueue.push(NotReady[i]);
+ }
+ NotReady.clear();
+
+ if (CurSU)
+ ScheduleNodeBottomUp(CurSU, CurCycle);
+ ++CurCycle;
+ }
+
+ // Reverse the order since it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+
+namespace {
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+ ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule() override;
+
+ MachineBasicBlock *
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos) override;
+
+private:
+ std::vector<SDNode*> Sequence;
+ DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user
+
+ void ScheduleNode(SDNode *N);
+};
+} // end anonymous namespace
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+ if (N->getNodeId() != 0)
+ llvm_unreachable(nullptr);
+
+ if (!N->isMachineOpcode() &&
+ (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+ // These nodes do not need to be translated into MIs.
+ return;
+
+ DEBUG(dbgs() << "\n*** Scheduling: ");
+ DEBUG(N->dump(DAG));
+ Sequence.push_back(N);
+
+ unsigned NumOps = N->getNumOperands();
+ if (unsigned NumLeft = NumOps) {
+ SDNode *GluedOpN = nullptr;
+ do {
+ const SDValue &Op = N->getOperand(NumLeft-1);
+ SDNode *OpN = Op.getNode();
+
+ if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+ // Schedule glue operand right above N.
+ GluedOpN = OpN;
+ assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+ OpN->setNodeId(0);
+ ScheduleNode(OpN);
+ continue;
+ }
+
+ if (OpN == GluedOpN)
+ // Glue operand is already scheduled.
+ continue;
+
+ DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+ if (DI != GluedMap.end() && DI->second != N)
+ // Users of glues are counted against the glued users.
+ OpN = DI->second;
+
+ unsigned Degree = OpN->getNodeId();
+ assert(Degree > 0 && "Predecessor over-released!");
+ OpN->setNodeId(--Degree);
+ if (Degree == 0)
+ ScheduleNode(OpN);
+ } while (--NumLeft);
+ }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+ while (SDNode *Glued = N->getGluedUser())
+ N = Glued;
+ return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+ DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+ SmallVector<SDNode*, 8> Glues;
+ unsigned DAGSize = 0;
+ for (SDNode &Node : DAG->allnodes()) {
+ SDNode *N = &Node;
+
+ // Use node id to record degree.
+ unsigned Degree = N->use_size();
+ N->setNodeId(Degree);
+ unsigned NumVals = N->getNumValues();
+ if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+ N->hasAnyUseOfValue(NumVals-1)) {
+ SDNode *User = findGluedUser(N);
+ if (User) {
+ Glues.push_back(N);
+ GluedMap.insert(std::make_pair(N, User));
+ }
+ }
+
+ if (N->isMachineOpcode() ||
+ (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+ ++DAGSize;
+ }
+
+ for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+ SDNode *Glue = Glues[i];
+ SDNode *GUser = GluedMap[Glue];
+ unsigned Degree = Glue->getNodeId();
+ unsigned UDegree = GUser->getNodeId();
+
+ // Glue user must be scheduled together with the glue operand. So other
+ // users of the glue operand must be treated as its users.
+ SDNode *ImmGUser = Glue->getGluedUser();
+ for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
+ ui != ue; ++ui)
+ if (*ui == ImmGUser)
+ --Degree;
+ GUser->setNodeId(UDegree + Degree);
+ Glue->setNodeId(1);
+ }
+
+ Sequence.reserve(DAGSize);
+ ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+
+ DEBUG({
+ dbgs() << "\n*** Final schedule ***\n";
+ });
+
+ // FIXME: Handle dbg_values.
+ unsigned NumNodes = Sequence.size();
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ SDNode *N = Sequence[NumNodes-i-1];
+ DEBUG(N->dump(DAG));
+ Emitter.EmitNode(N, false, false, VRBaseMap);
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGFast(*IS->MF);
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 000000000000..802c459a0223
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,3020 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms. The basic approach uses a priority
+// queue of available nodes to schedule. One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <climits>
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds, "Number of nodes unfolded");
+STATISTIC(NumDups, "Number of duplicated nodes");
+STATISTIC(NumPRCopies, "Number of physical register copies");
+
+static RegisterScheduler
+ burrListDAGScheduler("list-burr",
+ "Bottom-up register reduction list scheduling",
+ createBURRListDAGScheduler);
+static RegisterScheduler
+ sourceListDAGScheduler("source",
+ "Similar to list-burr but schedules in source "
+ "order when possible",
+ createSourceListDAGScheduler);
+
+static RegisterScheduler
+ hybridListDAGScheduler("list-hybrid",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance latency and register pressure",
+ createHybridListDAGScheduler);
+
+static RegisterScheduler
+ ILPListDAGScheduler("list-ilp",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance ILP and register pressure",
+ createILPListDAGScheduler);
+
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
+// Temporary sched=list-ilp flags until the heuristics are robust.
+// Some options are also available under sched=list-hybrid.
+static cl::opt<bool> DisableSchedRegPressure(
+ "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+ cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+ "disable-sched-live-uses", cl::Hidden, cl::init(true),
+ cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedVRegCycle(
+ "disable-sched-vrcycle", cl::Hidden, cl::init(false),
+ cl::desc("Disable virtual register cycle interference checks"));
+static cl::opt<bool> DisableSchedPhysRegJoin(
+ "disable-sched-physreg-join", cl::Hidden, cl::init(false),
+ cl::desc("Disable physreg def-use affinity"));
+static cl::opt<bool> DisableSchedStalls(
+ "disable-sched-stalls", cl::Hidden, cl::init(true),
+ cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+ "disable-sched-critical-path", cl::Hidden, cl::init(false),
+ cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+ "disable-sched-height", cl::Hidden, cl::init(false),
+ cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+ "disable-2addr-hack", cl::Hidden, cl::init(true),
+ cl::desc("Disable scheduler's two-address hack"));
+
+static cl::opt<int> MaxReorderWindow(
+ "max-sched-reorder", cl::Hidden, cl::init(6),
+ cl::desc("Number of instructions to allow ahead of the critical path "
+ "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+ "sched-avg-ipc", cl::Hidden, cl::init(1),
+ cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation. This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+ /// NeedLatency - True if the scheduler will make use of latency information.
+ ///
+ bool NeedLatency;
+
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle;
+
+ /// IssueCount - Count instructions issued in this cycle
+ /// Currently valid only for bottom-up scheduling.
+ unsigned IssueCount;
+
+ /// LiveRegDefs - A set of physical registers and their definition
+ /// that are "live". These nodes must be scheduled before any other nodes that
+ /// modifies the registers can be scheduled.
+ unsigned NumLiveRegs;
+ std::unique_ptr<SUnit*[]> LiveRegDefs;
+ std::unique_ptr<SUnit*[]> LiveRegGens;
+
+ // Collect interferences between physical register use/defs.
+ // Each interference is an SUnit and set of physical registers.
+ SmallVector<SUnit*, 4> Interferences;
+ typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
+ LRegsMapT LRegsMap;
+
+ /// Topo - A topological ordering for SUnits which permits fast IsReachable
+ /// and similar queries.
+ ScheduleDAGTopologicalSort Topo;
+
+ // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+ // DAG crawling.
+ DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
+public:
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf),
+ NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ Topo(SUnits, nullptr) {
+
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
+ }
+
+ ~ScheduleDAGRRList() override {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule() override;
+
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
+ /// IsReachable - Checks if SU is reachable from TargetSU.
+ bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+ return Topo.IsReachable(SU, TargetSU);
+ }
+
+ /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+ /// create a cycle.
+ bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+ return Topo.WillCreateCycle(SU, TargetSU);
+ }
+
+ /// AddPred - adds a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Updates the topological ordering if required.
+ void AddPred(SUnit *SU, const SDep &D) {
+ Topo.AddPred(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
+ /// RemovePred - removes a predecessor edge from SUnit SU.
+ /// This returns true if an edge was removed.
+ /// Updates the topological ordering if required.
+ void RemovePred(SUnit *SU, const SDep &D) {
+ Topo.RemovePred(SU, D.getSUnit());
+ SU->removePred(D);
+ }
+
+private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
+ void ReleasePred(SUnit *SU, const SDep *PredEdge);
+ void ReleasePredecessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
+ void CapturePred(SDep *PredEdge);
+ void UnscheduleNodeBottomUp(SUnit*);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
+ SUnit *CopyAndMoveSuccessors(SUnit*);
+ void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+ const TargetRegisterClass*,
+ const TargetRegisterClass*,
+ SmallVectorImpl<SUnit*>&);
+ bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
+
+ void releaseInterferences(unsigned Reg = 0);
+
+ SUnit *PickNodeToScheduleBottomUp();
+ void ListScheduleBottomUp();
+
+ /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+ /// Updates the topological ordering if required.
+ SUnit *CreateNewSUnit(SDNode *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = newSUnit(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// CreateClone - Creates a new SUnit from an existing one.
+ /// Updates the topological ordering if required.
+ SUnit *CreateClone(SUnit *N) {
+ unsigned NumSUnits = SUnits.size();
+ SUnit *NewNode = Clone(N);
+ // Update the topological ordering.
+ if (NewNode->NodeNum >= NumSUnits)
+ Topo.InitDAGTopologicalSorting();
+ return NewNode;
+ }
+
+ /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// need actual latency information but the hybrid scheduler does.
+ bool forceUnitLatencies() const override {
+ return !NeedLatency;
+ }
+};
+} // end anonymous namespace
+
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI,
+ unsigned &RegClass, unsigned &Cost,
+ const MachineFunction &MF) {
+ MVT VT = RegDefPos.GetValue();
+
+ // Special handling for untyped values. These values can only come from
+ // the expansion of custom DAG-to-DAG patterns.
+ if (VT == MVT::Untyped) {
+ const SDNode *Node = RegDefPos.GetNode();
+
+ // Special handling for CopyFromReg of untyped values.
+ if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Opcode = Node->getMachineOpcode();
+ if (Opcode == TargetOpcode::REG_SEQUENCE) {
+ unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+ RegClass = RC->getID();
+ Cost = 1;
+ return;
+ }
+
+ unsigned Idx = RegDefPos.GetIdx();
+ const MCInstrDesc Desc = TII->get(Opcode);
+ const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
+ RegClass = RC->getID();
+ // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+ // better way to determine it.
+ Cost = 1;
+ } else {
+ RegClass = TLI->getRepRegClassFor(VT)->getID();
+ Cost = TLI->getRepRegClassCostFor(VT);
+ }
+}
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
+
+ CurCycle = 0;
+ IssueCount = 0;
+ MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
+ NumLiveRegs = 0;
+ // Allocate slots for each physical register, plus one for a special register
+ // to track the virtual resource of a calling sequence.
+ LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
+ LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
+ CallSeqEndForStart.clear();
+ assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(nullptr);
+
+ DEBUG(for (SUnit &SU : SUnits)
+ SU.dumpAll(this));
+ Topo.InitDAGTopologicalSorting();
+
+ AvailableQueue->initNodes(SUnits);
+
+ HazardRec->Reset();
+
+ // Execute the actual scheduling loop.
+ ListScheduleBottomUp();
+
+ AvailableQueue->releaseState();
+
+ DEBUG({
+ dbgs() << "*** Final schedule ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
+
+//===----------------------------------------------------------------------===//
+// Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+ if (PredSU->NumSuccsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ PredSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ --PredSU->NumSuccsLeft;
+
+ if (!forceUnitLatencies()) {
+ // Updating predecessor's height. This is now the cycle when the
+ // predecessor can be scheduled without causing a pipeline stall.
+ PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+ }
+
+ // If all the node's successors are scheduled, this node is ready
+ // to be scheduled. Ignore the special EntrySU node.
+ if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+ PredSU->isAvailable = true;
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(PredSU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
+ }
+}
+
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+ unsigned NestLevel,
+ const TargetInstrInfo *TII) {
+ SDNode *N = Outer;
+ for (;;) {
+ if (N == Inner)
+ return true;
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ for (const SDValue &Op : N->op_values())
+ if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
+ return true;
+ return false;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ if (NestLevel == 0)
+ return false;
+ --NestLevel;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (const SDValue &Op : N->op_values())
+ if (Op.getValueType() == MVT::Other) {
+ N = Op.getNode();
+ goto found_chain_operand;
+ }
+ return false;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return false;
+ }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+ const TargetInstrInfo *TII) {
+ for (;;) {
+ // For a TokenFactor, examine each operand. There may be multiple ways
+ // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+ // most nesting in order to ensure that we find the corresponding match.
+ if (N->getOpcode() == ISD::TokenFactor) {
+ SDNode *Best = nullptr;
+ unsigned BestMaxNest = MaxNest;
+ for (const SDValue &Op : N->op_values()) {
+ unsigned MyNestLevel = NestLevel;
+ unsigned MyMaxNest = MaxNest;
+ if (SDNode *New = FindCallSeqStart(Op.getNode(),
+ MyNestLevel, MyMaxNest, TII))
+ if (!Best || (MyMaxNest > BestMaxNest)) {
+ Best = New;
+ BestMaxNest = MyMaxNest;
+ }
+ }
+ assert(Best);
+ MaxNest = BestMaxNest;
+ return Best;
+ }
+ // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+ if (N->isMachineOpcode()) {
+ if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameDestroyOpcode()) {
+ ++NestLevel;
+ MaxNest = std::max(MaxNest, NestLevel);
+ } else if (N->getMachineOpcode() ==
+ (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NestLevel != 0);
+ --NestLevel;
+ if (NestLevel == 0)
+ return N;
+ }
+ }
+ // Otherwise, find the chain and continue climbing.
+ for (const SDValue &Op : N->op_values())
+ if (Op.getValueType() == MVT::Other) {
+ N = Op.getNode();
+ goto found_chain_operand;
+ }
+ return nullptr;
+ found_chain_operand:;
+ if (N->getOpcode() == ISD::EntryToken)
+ return nullptr;
+ }
+}
+
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
+ // Bottom up: release predecessors
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ ReleasePred(SU, &*I);
+ if (I->isAssignedRegDep()) {
+ // This is a physical register dependency and it's impossible or
+ // expensive to copy the register. Make sure nothing that can
+ // clobber the register is scheduled between the predecessor and
+ // this node.
+ SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ if (!LiveRegGens[I->getReg()]) {
+ ++NumLiveRegs;
+ LiveRegGens[I->getReg()] = SU;
+ }
+ }
+ }
+
+ // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+ // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+ // these nodes, to prevent other calls from being interscheduled with them.
+ unsigned CallResource = TRI->getNumRegs();
+ if (!LiveRegDefs[CallResource])
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ unsigned NestLevel = 0;
+ unsigned MaxNest = 0;
+ SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+
+ SUnit *Def = &SUnits[N->getNodeId()];
+ CallSeqEndForStart[Def] = SU;
+
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = Def;
+ LiveRegGens[CallResource] = SU;
+ break;
+ }
+}
+
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle = PendingQueue[i]->getHeight();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ IssueCount = 0;
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ HazardRec->RecedeCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ // FIXME: Nodes such as CopyFromReg probably should not advance the current
+ // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
+ // has predecessors the cycle will be advanced when they are scheduled.
+ // But given the crude nature of modeling latency though such nodes, we
+ // currently need to treat these nodes like real instructions.
+ // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
+
+ unsigned ReadyCycle = SU->getHeight();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, -Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+}
+
+static void resetVRegCycle(SUnit *SU);
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
+ DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+#ifndef NDEBUG
+ if (CurCycle < SU->getHeight())
+ DEBUG(dbgs() << " Height [" << SU->getHeight()
+ << "] pipeline stall!\n");
+#endif
+
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node its ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
+ SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled instruction.
+ EmitNode(SU);
+
+ Sequence.push_back(SU);
+
+ AvailableQueue->scheduledNode(SU);
+
+ // If HazardRec is disabled, and each inst counts as one cycle, then
+ // advance CurCycle before ReleasePredecessors to avoid useless pushes to
+ // PendingQueue for schedulers that implement HasReadyFilter.
+ if (!HazardRec->isEnabled() && AvgIPC < 2)
+ AdvanceToCycle(CurCycle + 1);
+
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
+
+ // Release all the implicit physical register defs that are live.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = nullptr;
+ LiveRegGens[I->getReg()] = nullptr;
+ releaseInterferences(I->getReg());
+ }
+ }
+ // Release the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = nullptr;
+ LiveRegGens[CallResource] = nullptr;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ resetVRegCycle(SU);
+
+ SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, the cycle was pre-advanced before calling
+ // ReleasePredecessors. In that case, IssueCount should remain 0.
+ //
+ // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+ if (HazardRec->isEnabled() || AvgIPC > 1) {
+ if (SU->getNode() && SU->getNode()->isMachineOpcode())
+ ++IssueCount;
+ if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+ || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
+ AdvanceToCycle(CurCycle + 1);
+ }
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+ SUnit *PredSU = PredEdge->getSUnit();
+ if (PredSU->isAvailable) {
+ PredSU->isAvailable = false;
+ if (!PredSU->isPending)
+ AvailableQueue->remove(PredSU);
+ }
+
+ assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+ DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+ DEBUG(SU->dump(this));
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ CapturePred(&*I);
+ if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+ "Physical register dependency violated?");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = nullptr;
+ LiveRegGens[I->getReg()] = nullptr;
+ releaseInterferences(I->getReg());
+ }
+ }
+
+ // Reclaim the special call resource dependence, if this is the beginning
+ // of a call.
+ unsigned CallResource = TRI->getNumRegs();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ ++NumLiveRegs;
+ LiveRegDefs[CallResource] = SU;
+ LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+ }
+ }
+
+ // Release the special call resource dependence, if this is the end
+ // of a call.
+ if (LiveRegGens[CallResource] == SU)
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isMachineOpcode() &&
+ SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[CallResource] = nullptr;
+ LiveRegGens[CallResource] = nullptr;
+ releaseInterferences(CallResource);
+ }
+ }
+
+ for (auto &Succ : SU->Succs) {
+ if (Succ.isAssignedRegDep()) {
+ auto Reg = Succ.getReg();
+ if (!LiveRegDefs[Reg])
+ ++NumLiveRegs;
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[Reg] = SU;
+
+ // Update LiveRegGen only if was empty before this unscheduling.
+ // This is to avoid incorrect updating LiveRegGen set in previous run.
+ if (!LiveRegGens[Reg]) {
+ // Find the successor with the lowest height.
+ LiveRegGens[Reg] = Succ.getSUnit();
+ for (auto &Succ2 : SU->Succs) {
+ if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
+ Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
+ LiveRegGens[Reg] = Succ2.getSUnit();
+ }
+ }
+ }
+ }
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
+
+ SU->setHeightDirty();
+ SU->isScheduled = false;
+ SU->isAvailable = true;
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
+ AvailableQueue->unscheduledNode(SU);
+}
+
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
+ Sequence.pop_back();
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
+ UnscheduleNodeBottomUp(OldSU);
+ AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
+ }
+
+ assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
+ ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->isOperandOf(N))
+ return true;
+ }
+ return false;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ if (!N)
+ return nullptr;
+
+ if (SU->getNode()->getGluedNode())
+ return nullptr;
+
+ SUnit *NewSU;
+ bool TryUnfold = false;
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue)
+ return nullptr;
+ else if (VT == MVT::Other)
+ TryUnfold = true;
+ }
+ for (const SDValue &Op : N->op_values()) {
+ MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
+ if (VT == MVT::Glue)
+ return nullptr;
+ }
+
+ if (TryUnfold) {
+ SmallVector<SDNode*, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return nullptr;
+
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return nullptr;
+
+ DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+ SDValue(LoadNode, 1));
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
+ computeLatency(LoadSU);
+ }
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPreds.push_back(Pred);
+ else if (isOperandOf(Pred.getSUnit(), LoadNode))
+ LoadPreds.push_back(Pred);
+ else
+ NodePreds.push_back(Pred);
+ }
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
+ else
+ NodeSuccs.push_back(Succ);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (const SDep &Pred : ChainPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (const SDep &Pred : LoadPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (const SDep &Pred : NodePreds) {
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (SDep D : NodeSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+ && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
+ }
+ for (SDep D : ChainSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ SDep D(LoadSU, SDep::Data, 0);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0) {
+ NewSU->isAvailable = true;
+ return NewSU;
+ }
+ SU = NewSU;
+ }
+
+ DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
+ NewSU = CreateClone(SU);
+
+ // New SUnit has the exact same predecessors.
+ for (SDep &Pred : SU->Preds)
+ if (!Pred.isArtificial())
+ AddPred(NewSU, Pred);
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(NewSU);
+ AddPred(SuccSU, D);
+ D.setSUnit(SU);
+ DelDeps.push_back(std::make_pair(SuccSU, D));
+ }
+ }
+ for (auto &DelDep : DelDeps)
+ RemovePred(DelDep.first, DelDep.second);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumDups;
+ return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC,
+ SmallVectorImpl<SUnit*> &Copies) {
+ SUnit *CopyFromSU = CreateNewSUnit(nullptr);
+ CopyFromSU->CopySrcRC = SrcRC;
+ CopyFromSU->CopyDstRC = DestRC;
+
+ SUnit *CopyToSU = CreateNewSUnit(nullptr);
+ CopyToSU->CopySrcRC = DestRC;
+ CopyToSU->CopyDstRC = SrcRC;
+
+ // Only copy scheduled successors. Cut them from old node's successor
+ // list and move them over.
+ SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isArtificial())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->isScheduled) {
+ SDep D = Succ;
+ D.setSUnit(CopyToSU);
+ AddPred(SuccSU, D);
+ DelDeps.push_back(std::make_pair(SuccSU, Succ));
+ }
+ else {
+ // Avoid scheduling the def-side copy before other successors. Otherwise
+ // we could introduce another physreg interference on the copy and
+ // continue inserting copies indefinitely.
+ AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
+ }
+ }
+ for (auto &DelDep : DelDeps)
+ RemovePred(DelDep.first, DelDep.second);
+
+ SDep FromDep(SU, SDep::Data, Reg);
+ FromDep.setLatency(SU->Latency);
+ AddPred(CopyFromSU, FromDep);
+ SDep ToDep(CopyFromSU, SDep::Data, 0);
+ ToDep.setLatency(CopyFromSU->Latency);
+ AddPred(CopyToSU, ToDep);
+
+ AvailableQueue->updateNode(SU);
+ AvailableQueue->addNode(CopyFromSU);
+ AvailableQueue->addNode(CopyToSU);
+ Copies.push_back(CopyFromSU);
+ Copies.push_back(CopyToSU);
+
+ ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+ const TargetInstrInfo *TII) {
+ unsigned NumRes;
+ if (N->getOpcode() == ISD::CopyFromReg) {
+ // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
+ NumRes = 1;
+ } else {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+ NumRes = MCID.getNumDefs();
+ for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+ if (Reg == *ImpDef)
+ break;
+ ++NumRes;
+ }
+ }
+ return N->getSimpleValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ SUnit **LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVectorImpl<unsigned> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AliasI]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AliasI] == SU) continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AliasI).second) {
+ LRegs.push_back(*AliasI);
+ }
+ }
+}
+
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+ ArrayRef<SUnit*> LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVectorImpl<unsigned> &LRegs) {
+ // Look at all live registers. Skip Reg0 and the special CallResource.
+ for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+ if (!LiveRegDefs[i]) continue;
+ if (LiveRegDefs[i] == SU) continue;
+ if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+ if (RegAdded.insert(i).second)
+ LRegs.push_back(i);
+ }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+ for (const SDValue &Op : N->op_values())
+ if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
+ return RegOp->getRegMask();
+ return nullptr;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
+ if (NumLiveRegs == 0)
+ return false;
+
+ SmallSet<unsigned, 4> RegAdded;
+ // If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(),
+ RegAdded, LRegs, TRI);
+ }
+
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+ InlineAsm::isClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
+
+ if (!Node->isMachineOpcode())
+ continue;
+ // If we're in the middle of scheduling a call, don't begin scheduling
+ // another call. Also, don't allow any physical registers to be live across
+ // the call.
+ if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ // Check the special calling-sequence resource.
+ unsigned CallResource = TRI->getNumRegs();
+ if (LiveRegDefs[CallResource]) {
+ SDNode *Gen = LiveRegGens[CallResource]->getNode();
+ while (SDNode *Glued = Gen->getGluedNode())
+ Gen = Glued;
+ if (!IsChainDependent(Gen, Node, 0, TII) &&
+ RegAdded.insert(CallResource).second)
+ LRegs.push_back(CallResource);
+ }
+ }
+ if (const uint32_t *RegMask = getNodeRegMask(Node))
+ CheckForLiveRegDefMasked(SU, RegMask,
+ makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
+ RegAdded, LRegs);
+
+ const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (!MCID.ImplicitDefs)
+ continue;
+ for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+
+ return !LRegs.empty();
+}
+
+void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = Interferences.size(); i > 0; --i) {
+ SUnit *SU = Interferences[i-1];
+ LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
+ if (Reg) {
+ SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
+ if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
+ continue;
+ }
+ SU->isPending = false;
+ // The interfering node may no longer be available due to backtracking.
+ // Furthermore, it may have been made available again, in which case it is
+ // now already in the AvailableQueue.
+ if (SU->isAvailable && !SU->NodeQueueId) {
+ DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
+ AvailableQueue->push(SU);
+ }
+ if (i < Interferences.size())
+ Interferences[i-1] = Interferences.back();
+ Interferences.pop_back();
+ LRegsMap.erase(LRegsPos);
+ }
+}
+
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ DEBUG(dbgs() << " Interfering reg " <<
+ (LRegs[0] == TRI->getNumRegs() ? "CallResource"
+ : TRI->getName(LRegs[0]))
+ << " SU #" << CurSU->NodeNum << '\n');
+ std::pair<LRegsMapT::iterator, bool> LRegsPair =
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ if (LRegsPair.second) {
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ }
+ else {
+ assert(CurSU->isPending && "Interferences are pending");
+ // Update the interference with current live regs.
+ LRegsPair.first->second = LRegs;
+ }
+ CurSU = AvailableQueue->pop();
+ }
+ if (CurSU)
+ return CurSU;
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (SUnit *TrySU : Interferences) {
+ SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = nullptr;
+ unsigned LiveCycle = UINT_MAX;
+ for (unsigned Reg : LRegs) {
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ // BacktrackBottomUp mutates Interferences!
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
+ << TrySU->NodeNum << ")\n");
+ AddPred(TrySU, SDep(BtSU, SDep::Artificial));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer available.
+ if (!TrySU->isAvailable || !TrySU->NodeQueueId)
+ CurSU = AvailableQueue->pop();
+ else {
+ // Available and in AvailableQueue
+ AvailableQueue->remove(TrySU);
+ CurSU = TrySU;
+ }
+ // Interferences has been mutated. We must break.
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is the same as RC, then it must be possible
+ // copy the value directly. Do not try duplicate the def.
+ // If cross copy register class is not the same as RC, then it's possible to
+ // copy the value but it require cross register class copies and it is
+ // expensive.
+ // If cross copy register class is null, then it's not possible to copy
+ // the value at all.
+ SUnit *NewDef = nullptr;
+ if (DestRC != RC) {
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ if (!DestRC && !NewDef)
+ report_fatal_error("Can't handle live physical register dependency!");
+ }
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+ return CurSU;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+ // Release any predecessors of the special Exit node.
+ ReleasePredecessors(&ExitSU);
+
+ // Add root to Available queue.
+ if (!SUnits.empty()) {
+ SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+ assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+ RootSU->isAvailable = true;
+ AvailableQueue->push(RootSU);
+ }
+
+ // While Available queue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !Interferences.empty()) {
+ DEBUG(dbgs() << "\nExamining Available:\n";
+ AvailableQueue->dump(this));
+
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
+
+ AdvancePastStalls(SU);
+
+ ScheduleNodeBottomUp(SU);
+
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
+ }
+ }
+
+ // Reverse the order if it is bottom up.
+ std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/true);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+ SF &SortFunc;
+ reverse_sort(SF &sf) : SortFunc(sf) {}
+
+ bool operator()(SUnit* left, SUnit* right) const {
+ // reverse left/right rather than simply !SortFunc(left, right)
+ // to expose different paths in the comparison logic.
+ return SortFunc(right, left);
+ }
+};
+#endif // NDEBUG
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ src_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ hybrid_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
+ };
+
+ RegReductionPQBase *SPQ;
+ ilp_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit*> Queue;
+ unsigned CurQueueId;
+ bool TracksRegPressure;
+ bool SrcOrder;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter),
+ CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits) override;
+
+ void addNode(const SUnit *SU) override;
+
+ void updateNode(const SUnit *SU) override;
+
+ void releaseState() override {
+ SUnits = nullptr;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ if (!SU->getNode()) return 0;
+
+ return SU->getNode()->getIROrder();
+ }
+
+ bool empty() const override { return Queue.empty(); }
+
+ void push(SUnit *U) override {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) override {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != std::prev(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const override { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU) const;
+
+ int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
+
+ void scheduledNode(SUnit *SU) override;
+
+ void unscheduledNode(SUnit *SU) override;
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = std::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != std::prev(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+ if (DAG->StressSched) {
+ reverse_sort<SF> RPicker(Picker);
+ return popFromQueueImpl(Q, RPicker);
+ }
+#endif
+ (void)DAG;
+ return popFromQueueImpl(Q, Picker);
+}
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ bool srcorder,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+ tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const override { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const override {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() override {
+ if (Queue.empty()) return nullptr;
+
+ SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump(ScheduleDAG *DAG) const override {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit*> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ SU->dump(DAG);
+ }
+ }
+#endif
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+// Check for special nodes that bypass scheduling heuristics.
+// Currently this pushes TokenFactor nodes down, but may be used for other
+// pseudo-ops as well.
+//
+// Return -1 to schedule right above left, 1 for left above right.
+// Return 0 if no bias exists.
+static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
+ bool LSchedLow = left->isScheduleLow;
+ bool RSchedLow = right->isScheduleLow;
+ if (LSchedLow != RSchedLow)
+ return LSchedLow < RSchedLow ? 1 : -1;
+ return 0;
+}
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+ unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+ if (SethiUllmanNumber != 0)
+ return SethiUllmanNumber;
+
+ unsigned Extra = 0;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+ if (PredSethiUllman > SethiUllmanNumber) {
+ SethiUllmanNumber = PredSethiUllman;
+ Extra = 0;
+ } else if (PredSethiUllman == SethiUllmanNumber)
+ ++Extra;
+ }
+
+ SethiUllmanNumber += Extra;
+
+ if (SethiUllmanNumber == 0)
+ SethiUllmanNumber = 1;
+
+ return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+ for (const SUnit &SU : *SUnits)
+ CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+#if 1
+ return SethiUllmanNumbers[SU->NodeNum];
+#else
+ unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+ if (SU->isCallOp) {
+ // FIXME: This assumes all of the defs are used as call operands.
+ int NP = (int)Priority - SU->getNode()->getNumValues();
+ return (NP > 0) ? NP : 0;
+ }
+ return Priority;
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
+
+void RegReductionPQBase::dumpRegPressure() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
+ << RegLimit[Id] << '\n');
+ }
+#endif
+}
+
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
+ const SDNode *N = SU->getNode();
+
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+ LiveUses = 0;
+ int PDiff = 0;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ if (PredSU->getNode()->isMachineOpcode())
+ ++LiveUses;
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ MVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ ++PDiff;
+ }
+ }
+ const SDNode *N = SU->getNode();
+
+ if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+ return PDiff;
+
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ --PDiff;
+ }
+ return PDiff;
+}
+
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ if (!SU->getNode())
+ return;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ RegPressure[RCId] += Cost;
+ break;
+ }
+ }
+
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ unsigned RCId, Cost;
+ GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
+ if (RegPressure[RCId] < Cost) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= Cost;
+ }
+ }
+ dumpRegPressure();
+}
+
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N) return;
+
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ continue;
+ SUnit *PredSU = Pred.getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+ POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ MVT VT = PN->getSimpleValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ MVT VT = PN->getSimpleValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ dumpRegPressure();
+}
+
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+ unsigned MaxHeight = 0;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl()) continue; // ignore chain succs
+ unsigned Height = Succ.getSUnit()->getHeight();
+ // If there are bunch of CopyToRegs stacked up, they should be considered
+ // to be at the same position.
+ if (Succ.getSUnit()->getNode() &&
+ Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+ Height = closestSucc(Succ.getSUnit())+1;
+ if (Height > MaxHeight)
+ MaxHeight = Height;
+ }
+ return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+ unsigned Scratches = 0;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ Scratches++;
+ }
+ return Scratches;
+}
+
+/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
+/// CopyFromReg from a virtual register.
+static bool hasOnlyLiveInOpers(const SUnit *SU) {
+ bool RetVal = false;
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue;
+ const SUnit *PredSU = Pred.getSUnit();
+ if (PredSU->getNode() &&
+ PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (const SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl()) continue;
+ const SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+// Set isVRegCycle for a node with only live in opers and live out uses. Also
+// set isVRegCycle for its CopyFromReg operands.
+//
+// This is only relevant for single-block loops, in which case the VRegCycle
+// node is likely an induction variable in which the operand and target virtual
+// registers should be coalesced (e.g. pre/post increment values). Setting the
+// isVRegCycle flag helps the scheduler prioritize other uses of the same
+// CopyFromReg so that this node becomes the virtual register "kill". This
+// avoids interference between the values live in and out of the block and
+// eliminates a copy inside the loop.
+static void initVRegCycle(SUnit *SU) {
+ if (DisableSchedVRegCycle)
+ return;
+
+ if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
+ return;
+
+ DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
+
+ SU->isVRegCycle = true;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue;
+ Pred.getSUnit()->isVRegCycle = true;
+ }
+}
+
+// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
+// CopyFromReg operands. We should no longer penalize other uses of this VReg.
+static void resetVRegCycle(SUnit *SU) {
+ if (!SU->isVRegCycle)
+ return;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ SUnit *PredSU = Pred.getSUnit();
+ if (PredSU->isVRegCycle) {
+ assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
+ "VRegCycle def must be CopyFromReg");
+ Pred.getSUnit()->isVRegCycle = false;
+ }
+ }
+}
+
+// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
+// means a node that defines the VRegCycle has not been scheduled yet.
+static bool hasVRegCycleUse(const SUnit *SU) {
+ // If this SU also defines the VReg, don't hoist it as a "use".
+ if (SU->isVRegCycle)
+ return false;
+
+ for (const SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl()) continue; // ignore chain preds
+ if (Pred.getSUnit()->isVRegCycle &&
+ Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
+ DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
+ return true;
+ }
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // Scheduling an instruction that uses a VReg whose postincrement has not yet
+ // been scheduled will induce a copy. Model this as an extra cycle of latency.
+ int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
+ int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
+ int LHeight = (int)left->getHeight() + LPenalty;
+ int RHeight = (int)right->getHeight() + RPenalty;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall)
+ return 1;
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ } else if (RStall)
+ return -1;
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+ right->SchedulingPref == Sched::ILP)) {
+ // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
+ // is enabled, grouping instructions by cycle, then its height is already
+ // covered so only its depth matters. We also reach this point if both stall
+ // but have the same height.
+ if (!SPQ->getHazardRec()->isEnabled()) {
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ }
+ int LDepth = left->getDepth() - LPenalty;
+ int RDepth = right->getDepth() - RPenalty;
+ if (LDepth != RDepth) {
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+ // Schedule physical register definitions close to their use. This is
+ // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
+ // long as shortening physreg live ranges is generally good, we can defer
+ // creating a subtarget hook.
+ if (!DisableSchedPhysRegJoin) {
+ bool LHasPhysReg = left->hasPhysRegDefs;
+ bool RHasPhysReg = right->hasPhysRegDefs;
+ if (LHasPhysReg != RHasPhysReg) {
+ #ifndef NDEBUG
+ static const char *const PhysRegMsg[] = { " has no physreg",
+ " defines a physreg" };
+ #endif
+ DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
+ << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
+ << PhysRegMsg[RHasPhysReg] << "\n");
+ return LHasPhysReg < RHasPhysReg;
+ }
+ }
+
+ // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
+ unsigned LPriority = SPQ->getNodePriority(left);
+ unsigned RPriority = SPQ->getNodePriority(right);
+
+ // Be really careful about hoisting call operands above previous calls.
+ // Only allows it if it would reduce register pressure.
+ if (left->isCall && right->isCallOp) {
+ unsigned RNumVals = right->getNode()->getNumValues();
+ RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+ }
+ if (right->isCall && left->isCallOp) {
+ unsigned LNumVals = left->getNode()->getNumValues();
+ LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+ }
+
+ if (LPriority != RPriority)
+ return LPriority > RPriority;
+
+ // One or both of the nodes are calls and their sethi-ullman numbers are the
+ // same, then keep source order.
+ if (left->isCall || right->isCall) {
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+ }
+
+ // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+ // e.g.
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // and the following instructions are both ready.
+ // t2 = op c3
+ // t4 = op c4
+ //
+ // Then schedule t2 = op first.
+ // i.e.
+ // t4 = op c4
+ // t2 = op c3
+ // t1 = op t2, c1
+ // t3 = op t4, c2
+ //
+ // This creates more short live intervals.
+ unsigned LDist = closestSucc(left);
+ unsigned RDist = closestSucc(right);
+ if (LDist != RDist)
+ return LDist < RDist;
+
+ // How many registers becomes live when the node is scheduled.
+ unsigned LScratch = calcMaxScratches(left);
+ unsigned RScratch = calcMaxScratches(right);
+ if (LScratch != RScratch)
+ return LScratch > RScratch;
+
+ // Comparing latency against a call makes little sense unless the node
+ // is register pressure-neutral.
+ if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+ return (left->NodeQueueId > right->NodeQueueId);
+
+ // Do not compare latencies when one or both of the nodes are calls.
+ if (!DisableSchedCycles &&
+ !(left->isCall || right->isCall)) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
+ "NodeQueueId cannot be zero");
+ return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ unsigned LOrder = SPQ->getNodeOrdering(left);
+ unsigned ROrder = SPQ->getNodeOrdering(right);
+
+ // Prefer an ordering where the lower the non-zero order number, the higher
+ // the preference.
+ if ((LOrder || ROrder) && LOrder != ROrder)
+ return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+ return BURRSort(left, right, SPQ);
+}
+
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
+ return true;
+ }
+ else if (!LHigh && RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
+ return false;
+ }
+ if (!LHigh && !RHigh) {
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ return BURRSort(left, right, SPQ);
+}
+
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+static bool canEnableCoalescing(SUnit *SU) {
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return true;
+
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return true;
+
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return true;
+
+ return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (int res = checkSpecialNodes(left, right))
+ return res > 0;
+
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
+ unsigned LLiveUses = 0, RLiveUses = 0;
+ int LPDiff = 0, RPDiff = 0;
+ if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+ LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+ RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+ }
+ if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+ DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
+ << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+ return LPDiff > RPDiff;
+ }
+
+ if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+ bool LReduce = canEnableCoalescing(left);
+ bool RReduce = canEnableCoalescing(right);
+ if (LReduce && !RReduce) return false;
+ if (RReduce && !LReduce) return true;
+ }
+
+ if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+ DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+ << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+ return LLiveUses < RLiveUses;
+ }
+
+ if (!DisableSchedStalls) {
+ bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+ bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+ if (LStall != RStall)
+ return left->getHeight() > right->getHeight();
+ }
+
+ if (!DisableSchedCriticalPath) {
+ int spread = (int)left->getDepth() - (int)right->getDepth();
+ if (std::abs(spread) > MaxReorderWindow) {
+ DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+ << left->getDepth() << " != SU(" << right->NodeNum << "): "
+ << right->getDepth() << "\n");
+ return left->getDepth() < right->getDepth();
+ }
+ }
+
+ if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+ int spread = (int)left->getHeight() - (int)right->getHeight();
+ if (std::abs(spread) > MaxReorderWindow)
+ return left->getHeight() > right->getHeight();
+ }
+
+ return BURRSort(left, right, SPQ);
+}
+
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ if (!Disable2AddrHack)
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure && !SrcOrder)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+
+ // For single block loops, mark nodes that look like canonical IV increments.
+ if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
+ for (SUnit &SU : sunits)
+ initVRegCycle(&SU);
+}
+
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
+ if (SU->isTwoAddress) {
+ unsigned Opc = SU->getNode()->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
+ SDNode *DU = SU->getNode()->getOperand(i).getNode();
+ if (DU->getNodeId() != -1 &&
+ Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
+/// successor's explicit physregs whose definition can reach DepSU.
+/// i.e. DepSU should not be scheduled above SU.
+static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
+ ScheduleDAGRRList *scheduleDAG,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ const MCPhysReg *ImpDefs
+ = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
+ const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+ if(!ImpDefs && !RegMask)
+ return false;
+
+ for (const SDep &Succ : SU->Succs) {
+ SUnit *SuccSU = Succ.getSUnit();
+ for (const SDep &SuccPred : SuccSU->Preds) {
+ if (!SuccPred.isAssignedRegDep())
+ continue;
+
+ if (RegMask &&
+ MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
+ return true;
+
+ if (ImpDefs)
+ for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+ // Return true if SU clobbers this physical register use and the
+ // definition of the register reaches from DepSU. IsReachable queries
+ // a topological forward sort of the DAG (following the successors).
+ if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
+ scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
+ return true;
+ }
+ }
+ return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+ const TargetInstrInfo *TII,
+ const TargetRegisterInfo *TRI) {
+ SDNode *N = SuccSU->getNode();
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+ assert(ImpDefs && "Caller should check hasPhysRegDefs");
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (!SUNode->isMachineOpcode())
+ continue;
+ const MCPhysReg *SUImpDefs =
+ TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+ const uint32_t *SURegMask = getNodeRegMask(SUNode);
+ if (!SUImpDefs && !SURegMask)
+ continue;
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ MVT VT = N->getSimpleValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned Reg = ImpDefs[i - NumDefs];
+ if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+ return true;
+ if (!SUImpDefs)
+ continue;
+ for (;*SUImpDefs; ++SUImpDefs) {
+ unsigned SUReg = *SUImpDefs;
+ if (TRI->regsOverlap(Reg, SUReg))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+/// N
+/// / |
+/// / |
+/// U store
+/// |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+/// N
+/// ||
+/// ||
+/// store
+/// |
+/// U
+/// |
+/// ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
+ // Visit all the nodes in topological order, working top-down.
+ for (SUnit &SU : *SUnits) {
+ // For now, only look at nodes with no data successors, such as stores.
+ // These are especially important, due to the heuristics in
+ // getNodePriority for nodes with no data successors.
+ if (SU.NumSuccs != 0)
+ continue;
+ // For now, only look at nodes with exactly one data predecessor.
+ if (SU.NumPreds != 1)
+ continue;
+ // Avoid prescheduling copies to virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU.getNode())
+ if (N->getOpcode() == ISD::CopyToReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Locate the single data predecessor.
+ SUnit *PredSU = nullptr;
+ for (const SDep &Pred : SU.Preds)
+ if (!Pred.isCtrl()) {
+ PredSU = Pred.getSUnit();
+ break;
+ }
+ assert(PredSU);
+
+ // Don't rewrite edges that carry physregs, because that requires additional
+ // support infrastructure.
+ if (PredSU->hasPhysRegDefs)
+ continue;
+ // Short-circuit the case where SU is PredSU's only data successor.
+ if (PredSU->NumSuccs == 1)
+ continue;
+ // Avoid prescheduling to copies from virtual registers, which don't behave
+ // like other nodes from the perspective of scheduling heuristics.
+ if (SDNode *N = SU.getNode())
+ if (N->getOpcode() == ISD::CopyFromReg &&
+ TargetRegisterInfo::isVirtualRegister
+ (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ continue;
+
+ // Perform checks on the successors of PredSU.
+ for (const SDep &PredSucc : PredSU->Succs) {
+ SUnit *PredSuccSU = PredSucc.getSUnit();
+ if (PredSuccSU == &SU) continue;
+ // If PredSU has another successor with no data successors, for
+ // now don't attempt to choose either over the other.
+ if (PredSuccSU->NumSuccs == 0)
+ goto outer_loop_continue;
+ // Don't break physical register dependencies.
+ if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+ if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
+ goto outer_loop_continue;
+ // Don't introduce graph cycles.
+ if (scheduleDAG->IsReachable(&SU, PredSuccSU))
+ goto outer_loop_continue;
+ }
+
+ // Ok, the transformation is safe and the heuristics suggest it is
+ // profitable. Update the graph.
+ DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum
+ << " next to PredSU #" << PredSU->NodeNum
+ << " to guide scheduling in the presence of multiple uses\n");
+ for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+ SDep Edge = PredSU->Succs[i];
+ assert(!Edge.isAssignedRegDep());
+ SUnit *SuccSU = Edge.getSUnit();
+ if (SuccSU != &SU) {
+ Edge.setSUnit(PredSU);
+ scheduleDAG->RemovePred(SuccSU, Edge);
+ scheduleDAG->AddPred(&SU, Edge);
+ Edge.setSUnit(&SU);
+ scheduleDAG->AddPred(SuccSU, Edge);
+ --i;
+ }
+ }
+ outer_loop_continue:;
+ }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
+ for (SUnit &SU : *SUnits) {
+ if (!SU.isTwoAddress)
+ continue;
+
+ SDNode *Node = SU.getNode();
+ if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
+ continue;
+
+ bool isLiveOut = hasOnlyLiveOutUses(&SU);
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ unsigned NumRes = MCID.getNumDefs();
+ unsigned NumOps = MCID.getNumOperands() - NumRes;
+ for (unsigned j = 0; j != NumOps; ++j) {
+ if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
+ continue;
+ SDNode *DU = SU.getNode()->getOperand(j).getNode();
+ if (DU->getNodeId() == -1)
+ continue;
+ const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+ if (!DUSU)
+ continue;
+ for (const SDep &Succ : DUSU->Succs) {
+ if (Succ.isCtrl())
+ continue;
+ SUnit *SuccSU = Succ.getSUnit();
+ if (SuccSU == &SU)
+ continue;
+ // Be conservative. Ignore if nodes aren't at roughly the same
+ // depth and height.
+ if (SuccSU->getHeight() < SU.getHeight() &&
+ (SU.getHeight() - SuccSU->getHeight()) > 1)
+ continue;
+ // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+ // constrains whatever is using the copy, instead of the copy
+ // itself. In the case that the copy is coalesced, this
+ // preserves the intent of the pseudo two-address heurietics.
+ while (SuccSU->Succs.size() == 1 &&
+ SuccSU->getNode()->isMachineOpcode() &&
+ SuccSU->getNode()->getMachineOpcode() ==
+ TargetOpcode::COPY_TO_REGCLASS)
+ SuccSU = SuccSU->Succs.front().getSUnit();
+ // Don't constrain non-instruction nodes.
+ if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+ continue;
+ // Don't constrain nodes with physical register defs if the
+ // predecessor can clobber them.
+ if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
+ if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
+ continue;
+ }
+ // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+ // these may be coalesced away. We want them close to their uses.
+ unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+ if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+ SuccOpc == TargetOpcode::INSERT_SUBREG ||
+ SuccOpc == TargetOpcode::SUBREG_TO_REG)
+ continue;
+ if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
+ (!canClobber(SuccSU, DUSU) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
+ (!SU.isCommutable && SuccSU->isCommutable)) &&
+ !scheduleDAG->IsReachable(SuccSU, &SU)) {
+ DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
+ << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+ scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
+ }
+ }
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ BURegReductionPriorityQueue *PQ =
+ new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ SrcRegReductionPriorityQueue *PQ =
+ new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
+
+ HybridBURRPriorityQueue *PQ =
+ new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetLowering *TLI = IS->TLI;
+
+ ILPBURRPriorityQueue *PQ =
+ new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 000000000000..3be622f8c179
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,913 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+// This allows the latency-based scheduler to notice high latency instructions
+// without a target itinerary. The choice of number here has more to do with
+// balancing scheduler heuristics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+ "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+ cl::desc("Roughly estimate the number of cycles that 'long latency'"
+ "instructions take for targets with no itinerary"));
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+ : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
+ InstrItins(mf.getSubtarget().getInstrItineraryData()) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+ BB = bb;
+ DAG = dag;
+
+ // Clear the scheduler's SUnit DAG.
+ ScheduleDAG::clearDAG();
+ Sequence.clear();
+
+ // Invoke the target's selection of scheduler.
+ Schedule();
+}
+
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = nullptr;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.emplace_back(N, (unsigned)SUnits.size());
+ assert((Addr == nullptr || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ SUnit *SU = &SUnits.back();
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ if (!N ||
+ (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
+ SU->SchedulingPref = Sched::None;
+ else
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ return SU;
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+ SUnit *SU = newSUnit(Old->getNode());
+ SU->OrigNode = Old->OrigNode;
+ SU->Latency = Old->Latency;
+ SU->isVRegCycle = Old->isVRegCycle;
+ SU->isCall = Old->isCall;
+ SU->isCallOp = Old->isCallOp;
+ SU->isTwoAddress = Old->isTwoAddress;
+ SU->isCommutable = Old->isCommutable;
+ SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+ SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->isScheduleHigh = Old->isScheduleHigh;
+ SU->isScheduleLow = Old->isScheduleLow;
+ SU->SchedulingPref = Old->SchedulingPref;
+ Old->isCloned = true;
+ return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ unsigned &PhysReg, int &Cost) {
+ if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+ return;
+
+ unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return;
+
+ unsigned ResNo = User->getOperand(2).getResNo();
+ if (Def->getOpcode() == ISD::CopyFromReg &&
+ cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) {
+ PhysReg = Reg;
+ } else if (Def->isMachineOpcode()) {
+ const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (ResNo >= II.getNumDefs() &&
+ II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
+ PhysReg = Reg;
+ }
+
+ if (PhysReg != 0) {
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo));
+ Cost = RC->getCopyCost();
+ }
+}
+
+// Helper for AddGlue to clone node operands.
+static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs,
+ SDValue ExtraOper = SDValue()) {
+ SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end());
+ if (ExtraOper.getNode())
+ Ops.push_back(ExtraOper);
+
+ SDVTList VTList = DAG->getVTList(VTs);
+ MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr;
+ MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+ // Store memory references.
+ if (MN) {
+ Begin = MN->memoperands_begin();
+ End = MN->memoperands_end();
+ }
+
+ DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
+
+ // Reset the memory references
+ if (MN)
+ MN->setMemRefs(Begin, End);
+}
+
+static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
+ SDNode *GlueDestNode = Glue.getNode();
+
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return false;
+
+ // Don't add a glue operand to something that already uses glue.
+ if (GlueDestNode &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ return false;
+ }
+ // Don't add glue to something that already has a glue value.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
+
+ SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
+
+ CloneNodeWithValues(N, DAG, VTs, Glue);
+
+ return true;
+}
+
+// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the
+// node even though simply shrinking the value list is sufficient.
+static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
+ assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue &&
+ !N->hasAnyUseOfValue(N->getNumValues() - 1)) &&
+ "expected an unused glue value");
+
+ CloneNodeWithValues(N, DAG,
+ makeArrayRef(N->value_begin(), N->getNumValues() - 1));
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+ SDNode *Chain = nullptr;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1).getNode();
+ if (!Chain)
+ return;
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
+ SmallPtrSet<SDNode*, 16> Visited;
+ SmallVector<int64_t, 4> Offsets;
+ DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
+ bool Cluster = false;
+ SDNode *Base = Node;
+ // This algorithm requires a reasonably low use count before finding a match
+ // to avoid uselessly blowing up compile time in large blocks.
+ unsigned UseCount = 0;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E && UseCount < 100; ++I, ++UseCount) {
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User).second)
+ continue;
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2)
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
+ continue;
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1)
+ Base = User;
+ Cluster = true;
+ // Reset UseCount to allow more matches.
+ UseCount = 0;
+ }
+
+ if (!Cluster)
+ return;
+
+ // Sort them in increasing order.
+ std::sort(Offsets.begin(), Offsets.end());
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
+
+ if (NumLoads == 0)
+ return;
+
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ SDValue InGlue = SDValue(nullptr, 0);
+ if (AddGlue(Lead, InGlue, true, DAG))
+ InGlue = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutGlue = I < E - 1;
+ SDNode *Load = Loads[I];
+
+ // If AddGlue fails, we could leave an unsused glue value. This should not
+ // cause any
+ if (AddGlue(Load, InGlue, OutGlue, DAG)) {
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
+
+ ++LoadsClustered;
+ }
+ else if (!OutGlue && InGlue.getNode())
+ RemoveUnusedGlue(InGlue.getNode(), DAG);
+ }
+}
+
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+ for (SDNode &NI : DAG->allnodes()) {
+ SDNode *Node = &NI;
+ if (!Node || !Node->isMachineOpcode())
+ continue;
+
+ unsigned Opc = Node->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ if (MCID.mayLoad())
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads(Node);
+ }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+ // During scheduling, the NodeId field of SDNode is used to map SDNodes
+ // to their associated SUnits by holding SUnits table indices. A value
+ // of -1 means the SDNode does not yet have an associated SUnit.
+ unsigned NumNodes = 0;
+ for (SDNode &NI : DAG->allnodes()) {
+ NI.setNodeId(-1);
+ ++NumNodes;
+ }
+
+ // Reserve entries in the vector for each of the SUnits we are creating. This
+ // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+ // invalidated.
+ // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+ // This is a temporary workaround.
+ SUnits.reserve(NumNodes * 2);
+
+ // Add all nodes in depth first order.
+ SmallVector<SDNode*, 64> Worklist;
+ SmallPtrSet<SDNode*, 32> Visited;
+ Worklist.push_back(DAG->getRoot().getNode());
+ Visited.insert(DAG->getRoot().getNode());
+
+ SmallVector<SUnit*, 8> CallSUnits;
+ while (!Worklist.empty()) {
+ SDNode *NI = Worklist.pop_back_val();
+
+ // Add all operands to the worklist unless they've already been added.
+ for (const SDValue &Op : NI->op_values())
+ if (Visited.insert(Op.getNode()).second)
+ Worklist.push_back(Op.getNode());
+
+ if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
+ continue;
+
+ // If this node has already been processed, stop now.
+ if (NI->getNodeId() != -1) continue;
+
+ SUnit *NodeSUnit = newSUnit(NI);
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
+ SDNode *N = NI;
+ while (N->getNumOperands() &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+ N = N->getOperand(N->getNumOperands()-1).getNode();
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ }
+
+ // Scan down to find any glued succs.
+ N = NI;
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ UI != E; ++UI)
+ if (GlueVal.isOperandOf(*UI)) {
+ HasGlueUse = true;
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+ N = *UI;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
+ break;
+ }
+ if (!HasGlueUse) break;
+ }
+
+ if (NodeSUnit->isCall)
+ CallSUnits.push_back(NodeSUnit);
+
+ // Schedule zero-latency TokenFactor below any nodes that may increase the
+ // schedule height. Otherwise, ancestors of the TokenFactor may appear to
+ // have false stalls.
+ if (NI->getOpcode() == ISD::TokenFactor)
+ NodeSUnit->isScheduleLow = true;
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
+ // Update the SUnit.
+ NodeSUnit->setNode(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NodeSUnit->NodeNum);
+
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
+ // Assign the Latency field of NodeSUnit using target-provided information.
+ computeLatency(NodeSUnit);
+ }
+
+ // Find all call operands.
+ while (!CallSUnits.empty()) {
+ SUnit *SU = CallSUnits.pop_back_val();
+ for (const SDNode *SUNode = SU->getNode(); SUNode;
+ SUNode = SUNode->getGluedNode()) {
+ if (SUNode->getOpcode() != ISD::CopyToReg)
+ continue;
+ SDNode *SrcN = SUNode->getOperand(2).getNode();
+ if (isPassiveNode(SrcN)) continue; // Not scheduled.
+ SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+ SrcSU->isCallOp = true;
+ }
+ }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+ const TargetSubtargetInfo &ST = MF.getSubtarget();
+
+ // Check to see if the scheduler cares about latencies.
+ bool UnitLatencies = forceUnitLatencies();
+
+ // Pass 2: add the preds, succs, etc.
+ for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+ SUnit *SU = &SUnits[su];
+ SDNode *MainNode = SU->getNode();
+
+ if (MainNode->isMachineOpcode()) {
+ unsigned Opc = MainNode->getMachineOpcode();
+ const MCInstrDesc &MCID = TII->get(Opc);
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ SU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ SU->isCommutable = true;
+ }
+
+ // Find all predecessors and successors of the group.
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+ if (N->isMachineOpcode() &&
+ TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+ SU->hasPhysRegClobbers = true;
+ unsigned NumUsed = InstrEmitter::CountResults(N);
+ while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+ --NumUsed; // Skip over unused values at the end.
+ if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+ SU->hasPhysRegDefs = true;
+ }
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDNode *OpN = N->getOperand(i).getNode();
+ if (isPassiveNode(OpN)) continue; // Not scheduled.
+ SUnit *OpSU = &SUnits[OpN->getNodeId()];
+ assert(OpSU && "Node has no SUnit!");
+ if (OpSU == SU) continue; // In the same group.
+
+ EVT OpVT = N->getOperand(i).getValueType();
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
+ bool isChain = OpVT == MVT::Other;
+
+ unsigned PhysReg = 0;
+ int Cost = 1;
+ // Determine if this is a physical register dependency.
+ CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+ assert((PhysReg == 0 || !isChain) &&
+ "Chain dependence via physreg data?");
+ // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+ // emits a copy from the physical register to a virtual register unless
+ // it requires a cross class copy (cost < 0). That means we are only
+ // treating "expensive to copy" register dependency as physical register
+ // dependency. This may change in the future though.
+ if (Cost >= 0 && !StressSched)
+ PhysReg = 0;
+
+ // If this is a ctrl dep, latency is 1.
+ unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ // Special-case TokenFactor chains as zero-latency.
+ if(isChain && OpN->getOpcode() == ISD::TokenFactor)
+ OpLatency = 0;
+
+ SDep Dep = isChain ? SDep(OpSU, SDep::Barrier)
+ : SDep(OpSU, SDep::Data, PhysReg);
+ Dep.setLatency(OpLatency);
+ if (!isChain && !UnitLatencies) {
+ computeOperandLatency(OpN, N, i, Dep);
+ ST.adjustSchedDependency(OpSU, SU, Dep);
+ }
+
+ if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ //
+ // We can't tell (without more book-keeping) if this results from
+ // glued nodes or duplicate operands. As long as we don't reduce
+ // NumRegDefsLeft to zero, we handle the common cases well.
+ --OpSU->NumRegDefsLeft;
+ }
+ }
+ }
+ }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input. This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// glued together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+ // Cluster certain nodes which should be scheduled together.
+ ClusterNodes();
+ // Populate the SUnits array.
+ BuildSchedUnits();
+ // Compute all the scheduling dependencies between nodes.
+ AddSchedEdges();
+}
+
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ // Check for phys reg copy.
+ if (!Node)
+ return;
+
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ if (POpc == TargetOpcode::PATCHPOINT &&
+ Node->getValueType(0) == MVT::Other) {
+ // PATCHPOINT is defined to have one result, but it might really have none
+ // if we're not using CallingConv::AnyReg. Don't mistake the chain for a
+ // real definition.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ ValueType = Node->getSimpleValueType(DefIdx);
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (!Node) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
+ SDNode *N = SU->getNode();
+
+ // TokenFactor operands are considered zero latency, and some schedulers
+ // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
+ // whenever node latency is nonzero.
+ if (N && N->getOpcode() == ISD::TokenFactor) {
+ SU->Latency = 0;
+ return;
+ }
+
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies()) {
+ SU->Latency = 1;
+ return;
+ }
+
+ if (!InstrItins || InstrItins->isEmpty()) {
+ if (N && N->isMachineOpcode() &&
+ TII->isHighLatencyDef(N->getMachineOpcode()))
+ SU->Latency = HighLatencyCycles;
+ else
+ SU->Latency = 1;
+ return;
+ }
+
+ // Compute the latency for the node. We use the sum of the latencies for
+ // all nodes glued together into this SUnit.
+ SU->Latency = 0;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
+}
+
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (forceUnitLatencies())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
+ }
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (!SU->getNode()) {
+ dbgs() << "PHYS REG COPY\n";
+ return;
+ }
+
+ SU->getNode()->dump(DAG);
+ dbgs() << "\n";
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ dbgs() << " ";
+ GluedNodes.back()->dump(DAG);
+ dbgs() << "\n";
+ GluedNodes.pop_back();
+ }
+#endif
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGSDNodes::dumpSchedule() const {
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ if (SUnit *SU = Sequence[i])
+ SU->dump(this);
+ else
+ dbgs() << "**** NOOP ****\n";
+ }
+}
+#endif
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+ unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+ unsigned Noops = 0;
+ for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+ if (!Sequence[i])
+ ++Noops;
+ assert(Sequence.size() - Noops == ScheduledNodes &&
+ "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
+/// ProcessSDDbgValues - Process SDDbgValues associated with this node.
+static void
+ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
+ SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
+ DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) {
+ if (!N->getHasDebugValue())
+ return;
+
+ // Opportunistically insert immediate dbg_value uses, i.e. those with source
+ // order number right after the N.
+ MachineBasicBlock *BB = Emitter.getBlock();
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
+ for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
+ if (DVs[i]->isInvalidated())
+ continue;
+ unsigned DVOrder = DVs[i]->getOrder();
+ if (!Order || DVOrder == ++Order) {
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+ if (DbgMI) {
+ Orders.push_back(std::make_pair(DVOrder, DbgMI));
+ BB->insert(InsertPos, DbgMI);
+ }
+ DVs[i]->setIsInvalidated();
+ }
+ }
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void
+ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
+ SmallSet<unsigned, 8> &Seen) {
+ unsigned Order = N->getIROrder();
+ if (!Order || !Seen.insert(Order).second) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+ return;
+ }
+
+ MachineBasicBlock *BB = Emitter.getBlock();
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
+ // Fast-isel may have inserted some instructions, in which case the
+ // BB->back().isPHI() test will not fire when we want it to.
+ std::prev(Emitter.getInsertPos())->isPHI()) {
+ // Did not insert any instruction.
+ Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr));
+ return;
+ }
+
+ Orders.push_back(std::make_pair(Order, &*std::prev(Emitter.getInsertPos())));
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos) {
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (I->getSUnit()->CopyDstRC) {
+ // Copy to physical register.
+ DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+ assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+ // Find the destination physical register.
+ unsigned Reg = 0;
+ for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+ EE = SU->Succs.end(); II != EE; ++II) {
+ if (II->isCtrl()) continue; // ignore chain preds
+ if (II->getReg()) {
+ Reg = II->getReg();
+ break;
+ }
+ }
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
+ } else {
+ // Copy from physical register.
+ assert(I->getReg() && "Unknown physical register!");
+ unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+ (void)isNew; // Silence compiler warning.
+ assert(isNew && "Node emitted out of order - early");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
+ }
+ break;
+ }
+}
+
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+ DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+ SmallSet<unsigned, 8> Seen;
+ bool HasDbg = DAG->hasDebugValues();
+
+ // If this is the first BB, emit byval parameter dbg_value's.
+ if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+ SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+ SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+ for (; PDI != PDE; ++PDI) {
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+ if (DbgMI)
+ BB->insert(InsertPos, DbgMI);
+ }
+ }
+
+ for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+ SUnit *SU = Sequence[i];
+ if (!SU) {
+ // Null SUnit* is a noop.
+ TII->insertNoop(*Emitter.getBlock(), InsertPos);
+ continue;
+ }
+
+ // For pre-regalloc scheduling, create instructions corresponding to the
+ // SDNode and any glued SDNodes and append them to the block.
+ if (!SU->getNode()) {
+ // Emit a copy.
+ EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
+ continue;
+ }
+
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
+ GluedNodes.pop_back();
+ }
+ Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+ VRBaseMap);
+ // Remember the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
+ Seen);
+ }
+
+ // Insert all the dbg_values which have not already been inserted in source
+ // order sequence.
+ if (HasDbg) {
+ MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
+
+ // Sort the source order instructions and use the order to insert debug
+ // values.
+ std::sort(Orders.begin(), Orders.end(), less_first());
+
+ SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+ SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+ // Now emit the rest according to source order.
+ unsigned LastOrder = 0;
+ for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+ unsigned Order = Orders[i].first;
+ MachineInstr *MI = Orders[i].second;
+ // Insert all SDDbgValue's whose order(s) are before "Order".
+ if (!MI)
+ continue;
+ for (; DI != DE &&
+ (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+ if ((*DI)->isInvalidated())
+ continue;
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+ if (DbgMI) {
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
+ MachineBasicBlock::iterator Pos = MI;
+ MI->getParent()->insert(Pos, DbgMI);
+ }
+ }
+ }
+ LastOrder = Order;
+ }
+ // Add trailing DbgValue's before the terminator. FIXME: May want to add
+ // some of them before one or more conditional branches?
+ SmallVector<MachineInstr*, 8> DbgMIs;
+ while (DI != DE) {
+ if (!(*DI)->isInvalidated())
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
+ ++DI;
+ }
+
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+ InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
+ }
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+ return "sunit-dag." + BB->getFullName();
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 000000000000..5cc806668b12
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,180 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+
+namespace llvm {
+ /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+ ///
+ /// Edges between SUnits are initially based on edges in the SelectionDAG,
+ /// and additional edges can be added by the schedulers as heuristics.
+ /// SDNodes such as Constants, Registers, and a few others that are not
+ /// interesting to schedulers are not allocated SUnits.
+ ///
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
+ /// nodes into a single SUnit so that they are scheduled together.
+ ///
+ /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+ /// edges. Physical register dependence information is not carried in
+ /// the DAG and must be handled explicitly by schedulers.
+ ///
+ class ScheduleDAGSDNodes : public ScheduleDAG {
+ public:
+ MachineBasicBlock *BB;
+ SelectionDAG *DAG; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
+
+ /// The schedule. Null SUnit*'s represent noop instructions.
+ std::vector<SUnit*> Sequence;
+
+ explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+ ~ScheduleDAGSDNodes() override {}
+
+ /// Run - perform scheduling.
+ ///
+ void Run(SelectionDAG *dag, MachineBasicBlock *bb);
+
+ /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+ ///
+ static bool isPassiveNode(SDNode *Node) {
+ if (isa<ConstantSDNode>(Node)) return true;
+ if (isa<ConstantFPSDNode>(Node)) return true;
+ if (isa<RegisterSDNode>(Node)) return true;
+ if (isa<RegisterMaskSDNode>(Node)) return true;
+ if (isa<GlobalAddressSDNode>(Node)) return true;
+ if (isa<BasicBlockSDNode>(Node)) return true;
+ if (isa<FrameIndexSDNode>(Node)) return true;
+ if (isa<ConstantPoolSDNode>(Node)) return true;
+ if (isa<TargetIndexSDNode>(Node)) return true;
+ if (isa<JumpTableSDNode>(Node)) return true;
+ if (isa<ExternalSymbolSDNode>(Node)) return true;
+ if (isa<MCSymbolSDNode>(Node)) return true;
+ if (isa<BlockAddressSDNode>(Node)) return true;
+ if (Node->getOpcode() == ISD::EntryToken ||
+ isa<MDNodeSDNode>(Node)) return true;
+ return false;
+ }
+
+ /// NewSUnit - Creates a new SUnit and return a ptr to it.
+ ///
+ SUnit *newSUnit(SDNode *N);
+
+ /// Clone - Creates a clone of the specified SUnit. It does not copy the
+ /// predecessors / successors info nor the temporary scheduling states.
+ ///
+ SUnit *Clone(SUnit *N);
+
+ /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+ /// are input. This SUnit graph is similar to the SelectionDAG, but
+ /// excludes nodes that aren't interesting to scheduling, and represents
+ /// flagged together nodes with a single SUnit.
+ void BuildSchedGraph(AliasAnalysis *AA);
+
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
+ /// computeLatency - Compute node latency.
+ ///
+ virtual void computeLatency(SUnit *SU);
+
+ virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const;
+
+ /// Schedule - Order nodes according to selected style, filling
+ /// in the Sequence member.
+ ///
+ virtual void Schedule() = 0;
+
+ /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+ /// consistent with the Sequence of scheduled instructions.
+ void VerifyScheduledSequence(bool isBottomUp);
+
+ /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+ /// according to the order specified in Sequence.
+ ///
+ virtual MachineBasicBlock*
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+ void dumpNode(const SUnit *SU) const override;
+
+ void dumpSchedule() const;
+
+ std::string getGraphNodeLabel(const SUnit *SU) const override;
+
+ std::string getDAGName() const override;
+
+ virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ MVT ValueType;
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != nullptr; }
+
+ MVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ const SDNode *GetNode() const {
+ return Node;
+ }
+
+ unsigned GetIdx() const {
+ return DefIdx-1;
+ }
+
+ void Advance();
+ private:
+ void InitNodeNumDefs();
+ };
+
+ protected:
+ /// ForceUnitLatencies - Return true if all scheduling edges should be given
+ /// a latency value of one. The default is to return false; schedulers may
+ /// override this as needed.
+ virtual bool forceUnitLatencies() const { return false; }
+
+ private:
+ /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+ /// combined SUnits.
+ void ClusterNeighboringLoads(SDNode *Node);
+ /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+ ///
+ void ClusterNodes();
+
+ /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+ void BuildSchedUnits();
+ void AddSchedEdges();
+
+ void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+ MachineBasicBlock::iterator InsertPos);
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
new file mode 100644
index 000000000000..eee4a4b06718
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -0,0 +1,279 @@
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <climits>
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+ VLIWScheduler("vliw-td", "VLIW scheduler",
+ createVLIWDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This
+/// supports / top-down scheduling.
+///
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
+private:
+ /// AvailableQueue - The priority queue to use for the available SUnits.
+ ///
+ SchedulingPriorityQueue *AvailableQueue;
+
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands become available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// AA - AliasAnalysis for making memory reference queries.
+ AliasAnalysis *AA;
+
+public:
+ ScheduleDAGVLIW(MachineFunction &mf,
+ AliasAnalysis *aa,
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+ const TargetSubtargetInfo &STI = mf.getSubtarget();
+ HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
+ }
+
+ ~ScheduleDAGVLIW() override {
+ delete HazardRec;
+ delete AvailableQueue;
+ }
+
+ void Schedule() override;
+
+private:
+ void releaseSucc(SUnit *SU, const SDep &D);
+ void releaseSuccessors(SUnit *SU);
+ void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+ void listScheduleTopDown();
+};
+} // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGVLIW::Schedule() {
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " '" << BB->getName() << "' **********\n");
+
+ // Build the scheduling graph.
+ BuildSchedGraph(AA);
+
+ AvailableQueue->initNodes(SUnits);
+
+ listScheduleTopDown();
+
+ AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
+ SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(this);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+ assert(!D.isWeak() && "unexpected artificial DAG edge");
+
+ --SuccSU->NumPredsLeft;
+
+ SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+ // If all the node's predecessors are scheduled, this node is ready
+ // to be scheduled. Ignore the special ExitSU node.
+ if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+ PendingQueue.push_back(SuccSU);
+ }
+}
+
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
+ // Top down: release successors.
+ for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ assert(!I->isAssignedRegDep() &&
+ "The list-td scheduler doesn't yet support physreg dependencies!");
+
+ releaseSucc(SU, *I);
+ }
+}
+
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+ DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(SU->dump(this));
+
+ Sequence.push_back(SU);
+ assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+ SU->setDepthToAtLeast(CurCycle);
+
+ releaseSuccessors(SU);
+ SU->isScheduled = true;
+ AvailableQueue->scheduledNode(SU);
+}
+
+/// listScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGVLIW::listScheduleTopDown() {
+ unsigned CurCycle = 0;
+
+ // Release any successors of the special Entry node.
+ releaseSuccessors(&EntrySU);
+
+ // All leaves to AvailableQueue.
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ // It is available if it has no predecessors.
+ if (SUnits[i].Preds.empty()) {
+ AvailableQueue->push(&SUnits[i]);
+ SUnits[i].isAvailable = true;
+ }
+ }
+
+ // While AvailableQueue is not empty, grab the node with the highest
+ // priority. If it is not ready put it back. Schedule the node.
+ std::vector<SUnit*> NotReady;
+ Sequence.reserve(SUnits.size());
+ while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ if (PendingQueue[i]->getDepth() == CurCycle) {
+ AvailableQueue->push(PendingQueue[i]);
+ PendingQueue[i]->isAvailable = true;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+ else {
+ assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+ }
+ }
+
+ // If there are no instructions available, don't try to issue anything, and
+ // don't advance the hazard recognizer.
+ if (AvailableQueue->empty()) {
+ // Reset DFA state.
+ AvailableQueue->scheduledNode(nullptr);
+ ++CurCycle;
+ continue;
+ }
+
+ SUnit *FoundSUnit = nullptr;
+
+ bool HasNoopHazards = false;
+ while (!AvailableQueue->empty()) {
+ SUnit *CurSUnit = AvailableQueue->pop();
+
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+ if (HT == ScheduleHazardRecognizer::NoHazard) {
+ FoundSUnit = CurSUnit;
+ break;
+ }
+
+ // Remember if this is a noop hazard.
+ HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+ NotReady.push_back(CurSUnit);
+ }
+
+ // Add the nodes that aren't ready back onto the available list.
+ if (!NotReady.empty()) {
+ AvailableQueue->push_all(NotReady);
+ NotReady.clear();
+ }
+
+ // If we found a node to schedule, do it now.
+ if (FoundSUnit) {
+ scheduleNodeTopDown(FoundSUnit, CurCycle);
+ HazardRec->EmitInstruction(FoundSUnit);
+
+ // If this is a pseudo-op node, we don't want to increment the current
+ // cycle.
+ if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
+ ++CurCycle;
+ } else if (!HasNoopHazards) {
+ // Otherwise, we have a pipeline stall, but no other problem, just advance
+ // the current cycle and try again.
+ DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+ HazardRec->AdvanceCycle();
+ ++NumStalls;
+ ++CurCycle;
+ } else {
+ // Otherwise, we have no instructions to issue and we have instructions
+ // that will fault if we don't do this right. This is the case for
+ // processors without pipeline interlocks and other cases.
+ DEBUG(dbgs() << "*** Emitting noop\n");
+ HazardRec->EmitNoop();
+ Sequence.push_back(nullptr); // NULL here means noop
+ ++NumNoops;
+ ++CurCycle;
+ }
+ }
+
+#ifndef NDEBUG
+ VerifyScheduledSequence(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 000000000000..8235522b14bd
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,7306 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cmath>
+#include <utility>
+
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+ SDVTList Res = {VTs, NumVTs};
+ return Res;
+}
+
+// Default null implementations of the callbacks.
+void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+
+//===----------------------------------------------------------------------===//
+// ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+ return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+ const APFloat& Val) {
+ assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+ // convert modifies in place, so make a copy.
+ APFloat Val2 = APFloat(Val);
+ bool losesInfo;
+ (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven,
+ &losesInfo);
+ return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+// ISD Namespace
+//===----------------------------------------------------------------------===//
+
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
+ auto *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasUndefs;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) &&
+ EltVT.getSizeInBits() >= SplatBitSize;
+}
+
+// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
+// specializations of the more general isConstantSplatVector()?
+
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ // Look through a bit convert.
+ while (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ unsigned i = 0, e = N->getNumOperands();
+
+ // Skip over all of the undef values.
+ while (i != e && N->getOperand(i).isUndef())
+ ++i;
+
+ // Do not accept an all-undef vector.
+ if (i == e) return false;
+
+ // Do not accept build_vectors that aren't all constants or which have non-~0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target and
+ // a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all ones, not whether the individual
+ // constants are.
+ SDValue NotZero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
+ if (CN->getAPIntValue().countTrailingOnes() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
+ return false;
+ } else
+ return false;
+
+ // Okay, we have at least one ~0 value, check to see if the rest match or are
+ // undefs. Even with the above element type twiddling, this should be OK, as
+ // the same type legalization should have applied to all the elements.
+ for (++i; i != e; ++i)
+ if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef())
+ return false;
+ return true;
+}
+
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ // Look through a bit convert.
+ while (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+ bool IsAllUndef = true;
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ IsAllUndef = false;
+ // Do not accept build_vectors that aren't all constants or which have non-0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target
+ // and a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all zeros, not whether the individual
+ // constants are.
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
+ if (CN->getAPIntValue().countTrailingZeros() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
+ return false;
+ } else
+ return false;
+ }
+
+ // Do not accept an all-undef vector.
+ if (IsAllUndef)
+ return false;
+ return true;
+}
+
+bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ if (!isa<ConstantSDNode>(Op))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (const SDValue &Op : N->op_values()) {
+ if (Op.isUndef())
+ continue;
+ if (!isa<ConstantFPSDNode>(Op))
+ return false;
+ }
+ return true;
+}
+
+bool ISD::allOperandsUndef(const SDNode *N) {
+ // Return false if the node has no operands.
+ // This is "logically inconsistent" with the definition of "all" but
+ // is probably the desired behavior.
+ if (N->getNumOperands() == 0)
+ return false;
+
+ for (const SDValue &Op : N->op_values())
+ if (!Op.isUndef())
+ return false;
+
+ return true;
+}
+
+ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
+ case ISD::SEXTLOAD:
+ return ISD::SIGN_EXTEND;
+ case ISD::ZEXTLOAD:
+ return ISD::ZERO_EXTEND;
+ default:
+ break;
+ }
+
+ llvm_unreachable("Invalid LoadExtType");
+}
+
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+ // To perform this operation, we just need to swap the L and G bits of the
+ // operation.
+ unsigned OldL = (Operation >> 2) & 1;
+ unsigned OldG = (Operation >> 1) & 1;
+ return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits
+ (OldL << 1) | // New G bit
+ (OldG << 2)); // New L bit.
+}
+
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+ unsigned Operation = Op;
+ if (isInteger)
+ Operation ^= 7; // Flip L, G, E bits, but not U.
+ else
+ Operation ^= 15; // Flip all of the condition bits.
+
+ if (Operation > ISD::SETTRUE2)
+ Operation &= ~8; // Don't let N and U bits get set.
+
+ return ISD::CondCode(Operation);
+}
+
+
+/// For an integer comparison, return 1 if the comparison is a signed operation
+/// and 2 if the result is an unsigned comparison. Return zero if the operation
+/// does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+ switch (Opcode) {
+ default: llvm_unreachable("Illegal integer setcc operation!");
+ case ISD::SETEQ:
+ case ISD::SETNE: return 0;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return 1;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return 2;
+ }
+}
+
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed integer setcc with an unsigned integer setcc.
+ return ISD::SETCC_INVALID;
+
+ unsigned Op = Op1 | Op2; // Combine all of the condition bits.
+
+ // If the N and U bits get set then the resultant comparison DOES suddenly
+ // care about orderedness, and is true when ordered.
+ if (Op > ISD::SETTRUE2)
+ Op &= ~16; // Clear the U bit if the N bit is set.
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ Op = ISD::SETNE;
+
+ return ISD::CondCode(Op);
+}
+
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+ bool isInteger) {
+ if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ // Cannot fold a signed setcc with an unsigned setcc.
+ return ISD::SETCC_INVALID;
+
+ // Combine all of the condition bits.
+ ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+ // Canonicalize illegal integer setcc's.
+ if (isInteger) {
+ switch (Result) {
+ default: break;
+ case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
+ case ISD::SETOEQ: // SETEQ & SETU[LG]E
+ case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE
+ case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE
+ case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE
+ }
+ }
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
+ ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+ ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ ArrayRef<SDValue> Ops) {
+ for (auto& Op : Ops) {
+ ID.AddPointer(Op.getNode());
+ ID.AddInteger(Op.getResNo());
+ }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+ ArrayRef<SDUse> Ops) {
+ for (auto& Op : Ops) {
+ ID.AddPointer(Op.getNode());
+ ID.AddInteger(Op.getResNo());
+ }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
+ SDVTList VTList, ArrayRef<SDValue> OpList) {
+ AddNodeIDOpcode(ID, OpC);
+ AddNodeIDValueTypes(ID, VTList);
+ AddNodeIDOperands(ID, OpList);
+}
+
+/// If this is an SDNode with special info, add this info to the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::TargetExternalSymbol:
+ case ISD::ExternalSymbol:
+ case ISD::MCSymbol:
+ llvm_unreachable("Should only be used on nodes with operands");
+ default: break; // Normal nodes don't need extra info.
+ case ISD::TargetConstant:
+ case ISD::Constant: {
+ const ConstantSDNode *C = cast<ConstantSDNode>(N);
+ ID.AddPointer(C->getConstantIntValue());
+ ID.AddBoolean(C->isOpaque());
+ break;
+ }
+ case ISD::TargetConstantFP:
+ case ISD::ConstantFP: {
+ ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+ break;
+ }
+ case ISD::TargetGlobalAddress:
+ case ISD::GlobalAddress:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::GlobalTLSAddress: {
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ ID.AddPointer(GA->getGlobal());
+ ID.AddInteger(GA->getOffset());
+ ID.AddInteger(GA->getTargetFlags());
+ ID.AddInteger(GA->getAddressSpace());
+ break;
+ }
+ case ISD::BasicBlock:
+ ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+ break;
+ case ISD::Register:
+ ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+ break;
+ case ISD::RegisterMask:
+ ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+ break;
+ case ISD::SRCVALUE:
+ ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+ break;
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+ break;
+ case ISD::JumpTable:
+ case ISD::TargetJumpTable:
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+ ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+ break;
+ case ISD::ConstantPool:
+ case ISD::TargetConstantPool: {
+ const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+ ID.AddInteger(CP->getAlignment());
+ ID.AddInteger(CP->getOffset());
+ if (CP->isMachineConstantPoolEntry())
+ CP->getMachineCPVal()->addSelectionDAGCSEId(ID);
+ else
+ ID.AddPointer(CP->getConstVal());
+ ID.AddInteger(CP->getTargetFlags());
+ break;
+ }
+ case ISD::TargetIndex: {
+ const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+ ID.AddInteger(TI->getIndex());
+ ID.AddInteger(TI->getOffset());
+ ID.AddInteger(TI->getTargetFlags());
+ break;
+ }
+ case ISD::LOAD: {
+ const LoadSDNode *LD = cast<LoadSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::STORE: {
+ const StoreSDNode *ST = cast<StoreSDNode>(N);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_STORE: {
+ const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+ ID.AddInteger(AT->getMemoryVT().getRawBits());
+ ID.AddInteger(AT->getRawSubclassData());
+ ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::PREFETCH: {
+ const MemSDNode *PF = cast<MemSDNode>(N);
+ ID.AddInteger(PF->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+ i != e; ++i)
+ ID.AddInteger(SVN->getMaskElt(i));
+ break;
+ }
+ case ISD::TargetBlockAddress:
+ case ISD::BlockAddress: {
+ const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N);
+ ID.AddPointer(BA->getBlockAddress());
+ ID.AddInteger(BA->getOffset());
+ ID.AddInteger(BA->getTargetFlags());
+ break;
+ }
+ } // end switch (N->getOpcode())
+
+ // Target specific memory nodes could also have address spaces to check.
+ if (N->isTargetMemoryOpcode())
+ ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+ AddNodeIDOpcode(ID, N->getOpcode());
+ // Add the return value info.
+ AddNodeIDValueTypes(ID, N->getVTList());
+ // Add the operand info.
+ AddNodeIDOperands(ID, N->ops());
+
+ // Handle SDNode leafs with special info.
+ AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries volatility, temporalness, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+ bool isNonTemporal, bool isInvariant) {
+ assert((ConvType & 3) == ConvType &&
+ "ConvType may not require more than 2 bits!");
+ assert((AM & 7) == AM &&
+ "AM may not require more than 3 bits!");
+ return ConvType |
+ (AM << 2) |
+ (isVolatile << 5) |
+ (isNonTemporal << 6) |
+ (isInvariant << 7);
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+ if (N->getValueType(0) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::HANDLENODE:
+ case ISD::EH_LABEL:
+ return true; // Never CSE these nodes.
+ }
+
+ // Check that remaining values produced are not flags.
+ for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+ if (N->getValueType(i) == MVT::Glue)
+ return true; // Never CSE anything that produces a flag.
+
+ return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+ // Create a dummy node (which is not added to allnodes), that adds a reference
+ // to the root node, preventing it from being deleted.
+ HandleSDNode Dummy(getRoot());
+
+ SmallVector<SDNode*, 128> DeadNodes;
+
+ // Add all obviously-dead nodes to the DeadNodes worklist.
+ for (SDNode &Node : allnodes())
+ if (Node.use_empty())
+ DeadNodes.push_back(&Node);
+
+ RemoveDeadNodes(DeadNodes);
+
+ // If the root changed (e.g. it was a dead load, update the root).
+ setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
+
+ // Process the worklist, deleting the nodes and adding their uses to the
+ // worklist.
+ while (!DeadNodes.empty()) {
+ SDNode *N = DeadNodes.pop_back_val();
+
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, nullptr);
+
+ // Take the node out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Next, brutally remove the operand list. This is safe to do, as there are
+ // no cycles in the graph.
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Operand = Use.getNode();
+ Use.set(SDValue());
+
+ // Now that we removed this operand, see if there are no uses of it left.
+ if (Operand->use_empty())
+ DeadNodes.push_back(Operand);
+ }
+
+ DeallocateNode(N);
+ }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N){
+ SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+ // Create a dummy node that adds a reference to the root node, preventing
+ // it from being deleted. (This matters if the root is an operand of the
+ // dead node.)
+ HandleSDNode Dummy(getRoot());
+
+ RemoveDeadNodes(DeadNodes);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+ // First take this out of the appropriate CSE map.
+ RemoveNodeFromCSEMaps(N);
+
+ // Finally, remove uses due to operands of this node, remove from the
+ // AllNodes list, and delete the node.
+ DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+ assert(N->getIterator() != AllNodes.begin() &&
+ "Cannot delete the entry node!");
+ assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+ // Drop all of the operands and decrement used node's use counts.
+ N->DropOperands();
+
+ DeallocateNode(N);
+}
+
+void SDDbgInfo::erase(const SDNode *Node) {
+ DbgValMapType::iterator I = DbgValMap.find(Node);
+ if (I == DbgValMap.end())
+ return;
+ for (auto &Val: I->second)
+ Val->setIsInvalidated();
+ DbgValMap.erase(I);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+ // If we have operands, deallocate them.
+ removeOperands(N);
+
+ // Set the opcode to DELETED_NODE to help catch bugs when node
+ // memory is reallocated.
+ N->NodeType = ISD::DELETED_NODE;
+
+ NodeAllocator.Deallocate(AllNodes.remove(N));
+
+ // If any of the SDDbgValue nodes refer to this SDNode, invalidate
+ // them and forget about that node.
+ DbgInfo->erase(N);
+}
+
+#ifndef NDEBUG
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::BUILD_PAIR: {
+ EVT VT = N->getValueType(0);
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+ "Wrong return type!");
+ assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+ assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+ "Mismatched operand types!");
+ assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+ "Wrong operand type!");
+ assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+ "Wrong return type size");
+ break;
+ }
+ case ISD::BUILD_VECTOR: {
+ assert(N->getNumValues() == 1 && "Too many results!");
+ assert(N->getValueType(0).isVector() && "Wrong return type!");
+ assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+ "Wrong number of operands!");
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) {
+ assert((I->getValueType() == EltVT ||
+ (EltVT.isInteger() && I->getValueType().isInteger() &&
+ EltVT.bitsLE(I->getValueType()))) &&
+ "Wrong operand type!");
+ assert(I->getValueType() == N->getOperand(0).getValueType() &&
+ "Operands must all have the same type");
+ }
+ break;
+ }
+ }
+}
+#endif // NDEBUG
+
+/// \brief Insert a newly allocated node into the DAG.
+///
+/// Handles insertion into the all nodes list and CSE map, as well as
+/// verification and other common operations when a new node is allocated.
+void SelectionDAG::InsertNode(SDNode *N) {
+ AllNodes.push_back(N);
+#ifndef NDEBUG
+ N->PersistentId = NextPersistentId++;
+ VerifySDNode(N);
+#endif
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it. This is useful when we're about to delete or repurpose
+/// the node. We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+ bool Erased = false;
+ switch (N->getOpcode()) {
+ case ISD::HANDLENODE: return false; // noop.
+ case ISD::CONDCODE:
+ assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+ "Cond code doesn't exist!");
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr;
+ break;
+ case ISD::ExternalSymbol:
+ Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+ break;
+ case ISD::TargetExternalSymbol: {
+ ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+ Erased = TargetExternalSymbols.erase(
+ std::pair<std::string,unsigned char>(ESN->getSymbol(),
+ ESN->getTargetFlags()));
+ break;
+ }
+ case ISD::MCSymbol: {
+ auto *MCSN = cast<MCSymbolSDNode>(N);
+ Erased = MCSymbols.erase(MCSN->getMCSymbol());
+ break;
+ }
+ case ISD::VALUETYPE: {
+ EVT VT = cast<VTSDNode>(N)->getVT();
+ if (VT.isExtended()) {
+ Erased = ExtendedValueTypeNodes.erase(VT);
+ } else {
+ Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr;
+ ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr;
+ }
+ break;
+ }
+ default:
+ // Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
+ Erased = CSEMap.RemoveNode(N);
+ break;
+ }
+#ifndef NDEBUG
+ // Verify that the node was actually in one of the CSE maps, unless it has a
+ // flag result (which cannot be CSE'd) or is one of the special cases that are
+ // not subject to CSE.
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
+ !N->isMachineOpcode() && !doNotCSE(N)) {
+ N->dump(this);
+ dbgs() << "\n";
+ llvm_unreachable("Node is not in map!");
+ }
+#endif
+ return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
+ // For node types that aren't CSE'd, just act as if no identical node
+ // already exists.
+ if (!doNotCSE(N)) {
+ SDNode *Existing = CSEMap.GetOrInsertNode(N);
+ if (Existing != N) {
+ // If there was already an existing matching node, use ReplaceAllUsesWith
+ // to replace the dead one with the existing one. This can cause
+ // recursive merging of other unrelated nodes down the line.
+ ReplaceAllUsesWith(N, Existing);
+
+ // N is now dead. Inform the listeners and delete it.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeDeleted(N, Existing);
+ DeleteNodeNotInCSEMaps(N);
+ return;
+ }
+ }
+
+ // If the node doesn't already exist, we updated it. Inform listeners.
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return nullptr;
+
+ SDValue Ops[] = { Op };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
+ if (Node)
+ if (const SDNodeFlags *Flags = N->getFlags())
+ Node->intersectFlagsWith(Flags);
+ return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+ SDValue Op1, SDValue Op2,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return nullptr;
+
+ SDValue Ops[] = { Op1, Op2 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
+ if (Node)
+ if (const SDNodeFlags *Flags = N->getFlags())
+ Node->intersectFlagsWith(Flags);
+ return Node;
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified. If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take. If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
+ void *&InsertPos) {
+ if (doNotCSE(N))
+ return nullptr;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
+ AddNodeIDCustom(ID, N);
+ SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos);
+ if (Node)
+ if (const SDNodeFlags *Flags = N->getFlags())
+ Node->intersectFlagsWith(Flags);
+ return Node;
+}
+
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+ Type *Ty = VT == MVT::iPTR ?
+ PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+ VT.getTypeForEVT(*getContext());
+
+ return getDataLayout().getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
+ : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL),
+ EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
+ Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
+ UpdateListeners(nullptr) {
+ InsertNode(&EntryNode);
+ DbgInfo = new SDDbgInfo();
+}
+
+void SelectionDAG::init(MachineFunction &mf) {
+ MF = &mf;
+ TLI = getSubtarget().getTargetLowering();
+ TSI = getSubtarget().getSelectionDAGInfo();
+ Context = &mf.getFunction()->getContext();
+}
+
+SelectionDAG::~SelectionDAG() {
+ assert(!UpdateListeners && "Dangling registered DAGUpdateListeners");
+ allnodes_clear();
+ OperandRecycler.clear(OperandAllocator);
+ delete DbgInfo;
+}
+
+void SelectionDAG::allnodes_clear() {
+ assert(&*AllNodes.begin() == &EntryNode);
+ AllNodes.remove(AllNodes.begin());
+ while (!AllNodes.empty())
+ DeallocateNode(&AllNodes.front());
+#ifndef NDEBUG
+ NextPersistentId = 0;
+#endif
+}
+
+SDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTs, SDValue N1, SDValue N2,
+ const SDNodeFlags *Flags) {
+ SDValue Ops[] = {N1, N2};
+
+ if (isBinOpWithFlags(Opcode)) {
+ // If no flags were passed in, use a default flags object.
+ SDNodeFlags F;
+ if (Flags == nullptr)
+ Flags = &F;
+
+ auto *FN = newSDNode<BinaryWithFlagsSDNode>(Opcode, DL.getIROrder(),
+ DL.getDebugLoc(), VTs, *Flags);
+ createOperands(FN, Ops);
+
+ return FN;
+ }
+
+ auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ return N;
+}
+
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ void *&InsertPos) {
+ SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (N) {
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant:
+ case ISD::ConstantFP:
+ llvm_unreachable("Querying for Constant and ConstantFP nodes requires "
+ "debug location. Use another overload.");
+ }
+ }
+ return N;
+}
+
+SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+ const SDLoc &DL, void *&InsertPos) {
+ SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (N) {
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ case ISD::ConstantFP:
+ // Erase debug location from the node if the node is used at several
+ // different places. Do not propagate one location to all uses as it
+ // will cause a worse single stepping debugging experience.
+ if (N->getDebugLoc() != DL.getDebugLoc())
+ N->setDebugLoc(DebugLoc());
+ break;
+ default:
+ // When the node's point of use is located earlier in the instruction
+ // sequence than its prior point of use, update its debug info to the
+ // earlier location.
+ if (DL.getIROrder() && DL.getIROrder() < N->getIROrder())
+ N->setDebugLoc(DL.getDebugLoc());
+ break;
+ }
+ }
+ return N;
+}
+
+void SelectionDAG::clear() {
+ allnodes_clear();
+ OperandRecycler.clear(OperandAllocator);
+ OperandAllocator.Reset();
+ CSEMap.clear();
+
+ ExtendedValueTypeNodes.clear();
+ ExternalSymbols.clear();
+ TargetExternalSymbols.clear();
+ MCSymbols.clear();
+ std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+ static_cast<CondCodeSDNode*>(nullptr));
+ std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+ static_cast<SDNode*>(nullptr));
+
+ EntryNode.UseList = nullptr;
+ InsertNode(&EntryNode);
+ Root = getEntryNode();
+ DbgInfo->clear();
+}
+
+SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ANY_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ return VT.bitsGT(Op.getValueType()) ?
+ getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+ getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT,
+ EVT OpVT) {
+ if (VT.bitsLE(Op.getValueType()))
+ return getNode(ISD::TRUNCATE, SL, VT, Op);
+
+ TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT);
+ return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
+ assert(!VT.isVector() &&
+ "getZeroExtendInReg should use the vector element type instead of "
+ "the vector type!");
+ if (Op.getValueType() == VT) return Op;
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Imm = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ return getNode(ISD::AND, DL, Op.getValueType(), Op,
+ getConstant(Imm, DL, Op.getValueType()));
+}
+
+SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ "The sizes of the input and result must match in order to perform the "
+ "extend in-register.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ "The sizes of the input and result must match in order to perform the "
+ "extend in-register.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
+ EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ "The sizes of the input and result must match in order to perform the "
+ "extend in-register.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ SDValue NegOne =
+ getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
+ return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ SDValue TrueValue;
+ switch (TLI->getBooleanContents(VT)) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ TrueValue = getConstant(1, DL, VT);
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL,
+ VT);
+ break;
+ }
+ return getNode(ISD::XOR, DL, VT, Val, TrueValue);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
+ bool isT, bool isO) {
+ EVT EltVT = VT.getScalarType();
+ assert((EltVT.getSizeInBits() >= 64 ||
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ "getConstant with a uint64_t value that doesn't fit in the type!");
+ return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
+ bool isT, bool isO) {
+ return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
+ EVT VT, bool isT, bool isO) {
+ assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+ EVT EltVT = VT.getScalarType();
+ const ConstantInt *Elt = &Val;
+
+ // In some cases the vector type is legal but the element type is illegal and
+ // needs to be promoted, for example v8i8 on ARM. In this case, promote the
+ // inserted value (the type does not need to match the vector element type).
+ // Any extra bits introduced will be truncated away.
+ if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
+ }
+ // In other cases the element type is illegal and needs to be expanded, for
+ // example v2i64 on MIPS32. In this case, find the nearest legal type, split
+ // the value into n parts and use a vector type with n-times the elements.
+ // Then bitcast to the type requested.
+ // Legalizing constants too early makes the DAGCombiner's job harder so we
+ // only legalize if the DAG tells us we must produce legal types.
+ else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
+ TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypeExpandInteger) {
+ const APInt &NewVal = Elt->getValue();
+ EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
+ unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
+ EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
+
+ // Check the temporary vector is the correct size. If this fails then
+ // getTypeToTransformTo() probably returned a type whose size (in bits)
+ // isn't a power-of-2 factor of the requested type size.
+ assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SmallVector<SDValue, 2> EltParts;
+ for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
+ EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
+ .trunc(ViaEltSizeInBits), DL,
+ ViaEltVT, isT, isO));
+ }
+
+ // EltParts is currently in little endian order. If we actually want
+ // big-endian order then reverse it now.
+ if (getDataLayout().isBigEndian())
+ std::reverse(EltParts.begin(), EltParts.end());
+
+ // The elements must be reversed when the element order is different
+ // to the endianness of the elements (because the BITCAST is itself a
+ // vector shuffle in this situation). However, we do not need any code to
+ // perform this reversal because getConstant() is producing a vector
+ // splat.
+ // This situation occurs in MIPS MSA.
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+ Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
+
+ SDValue Result = getNode(ISD::BITCAST, DL, VT,
+ getNode(ISD::BUILD_VECTOR, DL, ViaVecVT, Ops));
+ return Result;
+ }
+
+ assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
+ "APInt size does not match type size!");
+ unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
+ ID.AddPointer(Elt);
+ ID.AddBoolean(isO);
+ void *IP = nullptr;
+ SDNode *N = nullptr;
+ if ((N = FindNodeOrInsertPos(ID, DL, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector())
+ Result = getSplatBuildVector(VT, DL, Result);
+ return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
+ bool isTarget) {
+ return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
+ bool isTarget) {
+ return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
+ EVT VT, bool isTarget) {
+ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+ EVT EltVT = VT.getScalarType();
+
+ // Do the map lookup using the actual bit pattern for the floating point
+ // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+ // we don't have issues with SNANs.
+ unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
+ ID.AddPointer(&V);
+ void *IP = nullptr;
+ SDNode *N = nullptr;
+ if ((N = FindNodeOrInsertPos(ID, DL, IP)))
+ if (!VT.isVector())
+ return SDValue(N, 0);
+
+ if (!N) {
+ N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ }
+
+ SDValue Result(N, 0);
+ if (VT.isVector())
+ Result = getSplatBuildVector(VT, DL, Result);
+ return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
+ bool isTarget) {
+ EVT EltVT = VT.getScalarType();
+ if (EltVT == MVT::f32)
+ return getConstantFP(APFloat((float)Val), DL, VT, isTarget);
+ else if (EltVT == MVT::f64)
+ return getConstantFP(APFloat(Val), DL, VT, isTarget);
+ else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 ||
+ EltVT == MVT::f16) {
+ bool Ignored;
+ APFloat APF = APFloat(Val);
+ APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ &Ignored);
+ return getConstantFP(APF, DL, VT, isTarget);
+ } else
+ llvm_unreachable("Unsupported type in getConstantFP");
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
+ EVT VT, int64_t Offset, bool isTargetGA,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTargetGA) &&
+ "Cannot set target flags on target-independent globals");
+
+ // Truncate (with sign-extension) the offset value to the pointer size.
+ unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
+ if (BitWidth < 64)
+ Offset = SignExtend64(Offset, BitWidth);
+
+ unsigned Opc;
+ if (GV->isThreadLocal())
+ Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+ else
+ Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddPointer(GV);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ ID.AddInteger(GV->getType()->getAddressSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<GlobalAddressSDNode>(
+ Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+ unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddInteger(FI);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent jump tables");
+ unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddInteger(JTI);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ ID.AddPointer(C);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+ unsigned Alignment, int Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ assert((TargetFlags == 0 || isTarget) &&
+ "Cannot set target flags on target-independent globals");
+ if (Alignment == 0)
+ Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
+ unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddInteger(Alignment);
+ ID.AddInteger(Offset);
+ C->addSelectionDAGCSEId(ID);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment,
+ TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+ unsigned char TargetFlags) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
+ ID.AddInteger(Index);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);
+ ID.AddPointer(MBB);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<BasicBlockSDNode>(MBB);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+ if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+ ValueTypeNodes.size())
+ ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+ SDNode *&N = VT.isExtended() ?
+ ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+ if (N) return SDValue(N, 0);
+ N = newSDNode<VTSDNode>(VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+ SDNode *&N = ExternalSymbols[Sym];
+ if (N) return SDValue(N, 0);
+ N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
+ SDNode *&N = MCSymbols[Sym];
+ if (N)
+ return SDValue(N, 0);
+ N = newSDNode<MCSymbolSDNode>(Sym, VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+ unsigned char TargetFlags) {
+ SDNode *&N =
+ TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+ TargetFlags)];
+ if (N) return SDValue(N, 0);
+ N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+ if ((unsigned)Cond >= CondCodeNodes.size())
+ CondCodeNodes.resize(Cond+1);
+
+ if (!CondCodeNodes[Cond]) {
+ auto *N = newSDNode<CondCodeSDNode>(Cond);
+ CondCodeNodes[Cond] = N;
+ InsertNode(N);
+ }
+
+ return SDValue(CondCodeNodes[Cond], 0);
+}
+
+/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that
+/// point at N1 to point at N2 and indices that point at N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
+ std::swap(N1, N2);
+ ShuffleVectorSDNode::commuteMask(M);
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
+ SDValue N2, ArrayRef<int> Mask) {
+ assert(VT.getVectorNumElements() == Mask.size() &&
+ "Must have the same number of vector elements as mask elements!");
+ assert(VT == N1.getValueType() && VT == N2.getValueType() &&
+ "Invalid VECTOR_SHUFFLE");
+
+ // Canonicalize shuffle undef, undef -> undef
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+
+ // Validate that all indices in Mask are within the range of the elements
+ // input to the shuffle.
+ int NElts = Mask.size();
+ assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
+ "Index out of range");
+
+ // Copy the mask so we can do any needed cleanup.
+ SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end());
+
+ // Canonicalize shuffle v, v -> v, undef
+ if (N1 == N2) {
+ N2 = getUNDEF(VT);
+ for (int i = 0; i != NElts; ++i)
+ if (MaskVec[i] >= NElts) MaskVec[i] -= NElts;
+ }
+
+ // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
+ if (N1.isUndef())
+ commuteShuffle(N1, N2, MaskVec);
+
+ // If shuffling a splat, try to blend the splat instead. We do this here so
+ // that even when this arises during lowering we don't have to re-handle it.
+ auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ if (!Splat)
+ return;
+
+ for (int i = 0; i < NElts; ++i) {
+ if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts))
+ continue;
+
+ // If this input comes from undef, mark it as such.
+ if (UndefElements[MaskVec[i] - Offset]) {
+ MaskVec[i] = -1;
+ continue;
+ }
+
+ // If we can blend a non-undef lane, use that instead.
+ if (!UndefElements[i])
+ MaskVec[i] = i + Offset;
+ }
+ };
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ BlendSplat(N1BV, 0);
+ if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+ BlendSplat(N2BV, NElts);
+
+ // Canonicalize all index into lhs, -> shuffle lhs, undef
+ // Canonicalize all index into rhs, -> shuffle rhs, undef
+ bool AllLHS = true, AllRHS = true;
+ bool N2Undef = N2.isUndef();
+ for (int i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= NElts) {
+ if (N2Undef)
+ MaskVec[i] = -1;
+ else
+ AllLHS = false;
+ } else if (MaskVec[i] >= 0) {
+ AllRHS = false;
+ }
+ }
+ if (AllLHS && AllRHS)
+ return getUNDEF(VT);
+ if (AllLHS && !N2Undef)
+ N2 = getUNDEF(VT);
+ if (AllRHS) {
+ N1 = getUNDEF(VT);
+ commuteShuffle(N1, N2, MaskVec);
+ }
+ // Reset our undef status after accounting for the mask.
+ N2Undef = N2.isUndef();
+ // Re-check whether both sides ended up undef.
+ if (N1.isUndef() && N2Undef)
+ return getUNDEF(VT);
+
+ // If Identity shuffle return that node.
+ bool Identity = true, AllSame = true;
+ for (int i = 0; i != NElts; ++i) {
+ if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false;
+ if (MaskVec[i] != MaskVec[0]) AllSame = false;
+ }
+ if (Identity && NElts)
+ return N1;
+
+ // Shuffling a constant splat doesn't change the result.
+ if (N2Undef) {
+ SDValue V = N1;
+
+ // Look through any bitcasts. We check that these don't change the number
+ // (and size) of elements and just changes their types.
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V->getOperand(0);
+
+ // A splat should always show up as a build vector node.
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ // If this is a splat of an undef, shuffling it is also undef.
+ if (Splat && Splat.isUndef())
+ return getUNDEF(VT);
+
+ bool SameNumElts =
+ V.getValueType().getVectorNumElements() == VT.getVectorNumElements();
+
+ // We only have a splat which can skip shuffles if there is a splatted
+ // value and no undef lanes rearranged by the shuffle.
+ if (Splat && UndefElements.none()) {
+ // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
+ // number of elements match or the value splatted is a zero constant.
+ if (SameNumElts)
+ return N1;
+ if (auto *C = dyn_cast<ConstantSDNode>(Splat))
+ if (C->isNullValue())
+ return N1;
+ }
+
+ // If the shuffle itself creates a splat, build the vector directly.
+ if (AllSame && SameNumElts) {
+ EVT BuildVT = BV->getValueType(0);
+ const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+ SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted);
+
+ // We may have jumped through bitcasts, so the type of the
+ // BUILD_VECTOR may not match the type of the shuffle.
+ if (BuildVT != VT)
+ NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+ return NewBV;
+ }
+ }
+ }
+
+ FoldingSetNodeID ID;
+ SDValue Ops[2] = { N1, N2 };
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops);
+ for (int i = 0; i != NElts; ++i)
+ ID.AddInteger(MaskVec[i]);
+
+ void* IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ // Allocate the mask array for the node out of the BumpPtrAllocator, since
+ // SDNode doesn't have access to it. This memory will be "leaked" when
+ // the node is deallocated, but recovered when the NodeAllocator is released.
+ int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+ std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc);
+
+ auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(),
+ dl.getDebugLoc(), MaskAlloc);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
+ MVT VT = SV.getSimpleValueType(0);
+ SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
+ ShuffleVectorSDNode::commuteMask(MaskVec);
+
+ SDValue Op0 = SV.getOperand(0);
+ SDValue Op1 = SV.getOperand(1);
+ return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec);
+}
+
+SDValue SelectionDAG::getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val,
+ SDValue DTy, SDValue STy, SDValue Rnd,
+ SDValue Sat, ISD::CvtCode Code) {
+ // If the src and dest types are the same and the conversion is between
+ // integer types of the same sign or two floats, no conversion is necessary.
+ if (DTy == STy &&
+ (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+ return Val;
+
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+ AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops);
+ void* IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N =
+ newSDNode<CvtRndSatSDNode>(VT, dl.getIROrder(), dl.getDebugLoc(), Code);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
+ ID.AddInteger(RegNo);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);
+ ID.AddPointer(RegMask);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<RegisterMaskSDNode>(RegMask);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root,
+ MCSymbol *Label) {
+ FoldingSetNodeID ID;
+ SDValue Ops[] = { Root };
+ AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
+ ID.AddPointer(Label);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+ int64_t Offset,
+ bool isTarget,
+ unsigned char TargetFlags) {
+ unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
+ ID.AddPointer(BA);
+ ID.AddInteger(Offset);
+ ID.AddInteger(TargetFlags);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+ assert((!V || V->getType()->isPointerTy()) &&
+ "SrcValue is not a pointer?");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
+ ID.AddPointer(V);
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<SrcValueSDNode>(V);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None);
+ ID.AddPointer(MD);
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<MDNodeSDNode>(MD);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) {
+ if (VT == V.getValueType())
+ return V;
+
+ return getNode(ISD::BITCAST, SDLoc(V), VT, V);
+}
+
+SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr,
+ unsigned SrcAS, unsigned DestAS) {
+ SDValue Ops[] = {Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops);
+ ID.AddInteger(SrcAS);
+ ID.AddInteger(DestAS);
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VT, SrcAS, DestAS);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
+ EVT OpTy = Op.getValueType();
+ EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
+ if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+ return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
+}
+
+SDValue SelectionDAG::expandVAArg(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = Node->getOperand(0);
+ SDValue Tmp2 = Node->getOperand(1);
+ unsigned Align = Node->getConstantOperandVal(3);
+
+ SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,
+ Tmp2, MachinePointerInfo(V));
+ SDValue VAList = VAListLoad;
+
+ if (Align > TLI.getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(Align - 1, dl, VAList.getValueType()));
+
+ VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+ getConstant(-(int64_t)Align, dl, VAList.getValueType()));
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+ getConstant(getDataLayout().getTypeAllocSize(
+ VT.getTypeForEVT(*getContext())),
+ dl, VAList.getValueType()));
+ // Store the incremented VAList to the legalized pointer
+ Tmp1 =
+ getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V));
+ // Load the actual argument out of the pointer VAList
+ return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo());
+}
+
+SDValue SelectionDAG::expandVACopy(SDNode *Node) {
+ SDLoc dl(Node);
+ const TargetLowering &TLI = getTargetLoweringInfo();
+ // This defaults to loading a pointer from the input and storing it to the
+ // output, returning the chain.
+ const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+ const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+ SDValue Tmp1 =
+ getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0),
+ Node->getOperand(2), MachinePointerInfo(VS));
+ return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD));
+}
+
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ unsigned ByteSize = VT.getStoreSize();
+ Type *Ty = VT.getTypeForEVT(*getContext());
+ unsigned StackAlign =
+ std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
+
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
+}
+
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+ unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
+ Type *Ty1 = VT1.getTypeForEVT(*getContext());
+ Type *Ty2 = VT2.getTypeForEVT(*getContext());
+ const DataLayout &DL = getDataLayout();
+ unsigned Align =
+ std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
+
+ MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+ return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
+ ISD::CondCode Cond, const SDLoc &dl) {
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return getConstant(0, dl, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: {
+ TargetLowering::BooleanContent Cnt =
+ TLI->getBooleanContents(N1->getValueType(0));
+ return getConstant(
+ Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
+ VT);
+ }
+
+ case ISD::SETOEQ:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ case ISD::SETO:
+ case ISD::SETUO:
+ case ISD::SETUEQ:
+ case ISD::SETUNE:
+ assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+ break;
+ }
+
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
+ const APInt &C2 = N2C->getAPIntValue();
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: return getConstant(C1 == C2, dl, VT);
+ case ISD::SETNE: return getConstant(C1 != C2, dl, VT);
+ case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT);
+ case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT);
+ case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT);
+ case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT);
+ case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT);
+ case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT);
+ case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT);
+ case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT);
+ }
+ }
+ }
+ if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) {
+ if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) {
+ APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, dl, VT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ // fall through
+ case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT);
+ case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT);
+ case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, dl, VT);
+ case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT);
+ case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, dl, VT);
+ case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, dl, VT);
+ case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT);
+ case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT);
+ }
+ } else {
+ // Ensure that the constant occurs on the RHS.
+ ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
+ MVT CompVT = N1.getValueType().getSimpleVT();
+ if (!TLI->isCondCodeLegal(SwappedCond, CompVT))
+ return SDValue();
+
+ return getSetCC(dl, VT, N2, N1, SwappedCond);
+ }
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+ // This predicate is not safe for vector operations.
+ if (Op.getValueType().isVector())
+ return false;
+
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
+/// this predicate to simplify operations downstream. Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+ unsigned Depth) const {
+ APInt KnownZero, KnownOne;
+ computeKnownBits(Op, KnownZero, KnownOne, Depth);
+ return (KnownZero & Mask) == Mask;
+}
+
+/// Determine which bits of Op are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bitsets.
+void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth) const {
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
+ if (Depth == 6)
+ return; // Limit search depth.
+
+ APInt KnownZero2, KnownOne2;
+
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
+ break;
+ case ISD::AND:
+ // If either the LHS or the RHS are Zero, the result is zero.
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ break;
+ case ISD::OR:
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ break;
+ case ISD::XOR: {
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+ KnownZero = KnownZeroOut;
+ break;
+ }
+ case ISD::MUL: {
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conserative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ KnownOne.clearAllBits();
+ unsigned TrailZ = KnownZero.countTrailingOnes() +
+ KnownZero2.countTrailingOnes();
+ unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
+ KnownZero2.countLeadingOnes(),
+ BitWidth) - BitWidth;
+
+ TrailZ = std::min(TrailZ, BitWidth);
+ LeadZ = std::min(LeadZ, BitWidth);
+ KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+ APInt::getHighBitsSet(BitWidth, LeadZ);
+ break;
+ }
+ case ISD::UDIV: {
+ // For the purposes of computing leading zeros we can conservatively
+ // treat a udiv as a logical right shift by the power of 2 known to
+ // be less than the denominator.
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ if (RHSUnknownLeadingOnes != BitWidth)
+ LeadZ = std::min(BitWidth,
+ LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+ KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+ break;
+ }
+ case ISD::SELECT:
+ computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SELECT_CC:
+ computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents.
+ // If we know the result of a setcc has the top bits zero, use this info.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ break;
+ case ISD::SETCC:
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ break;
+ case ISD::SHL:
+ // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero <<= ShAmt;
+ KnownOne <<= ShAmt;
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ }
+ break;
+ case ISD::SRL:
+ // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ break;
+ case ISD::SRA:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bits.
+ APInt SignBit = APInt::getSignBit(BitWidth);
+ SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask.
+
+ if (KnownZero.intersects(SignBit)) {
+ KnownZero |= HighBits; // New bits are known zero.
+ } else if (KnownOne.intersects(SignBit)) {
+ KnownOne |= HighBits; // New bits are known one.
+ }
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned EBits = EVT.getScalarType().getSizeInBits();
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
+
+ APInt InSignBit = APInt::getSignBit(EBits);
+ APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
+
+ // If the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InSignBit = InSignBit.zext(BitWidth);
+ if (NewBits.getBoolValue())
+ InputDemandedBits |= InSignBit;
+
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownOne &= InputDemandedBits;
+ KnownZero &= InputDemandedBits;
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+ if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear
+ KnownZero |= NewBits;
+ KnownOne &= ~NewBits;
+ } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP: {
+ unsigned LowBits = Log2_32(BitWidth)+1;
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownOne.clearAllBits();
+ break;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ // If this is a ZEXTLoad and we are looking at the loaded value.
+ if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ EVT VT = LD->getMemoryVT();
+ unsigned MemBits = VT.getScalarType().getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ } else if (const MDNode *Ranges = LD->getRanges()) {
+ if (LD->getExtensionType() == ISD::NON_EXTLOAD)
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne);
+ }
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
+
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+
+ // Note if the sign bit is known to be zero or one.
+ bool SignBitKnownZero = KnownZero.isNegative();
+ bool SignBitKnownOne = KnownOne.isNegative();
+
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero or one, the top bits match.
+ if (SignBitKnownZero)
+ KnownZero |= NewBits;
+ else if (SignBitKnownOne)
+ KnownOne |= NewBits;
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ KnownZero = KnownZero.zext(InBits);
+ KnownOne = KnownOne.zext(InBits);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ break;
+ }
+ case ISD::AssertZext: {
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ KnownZero |= (~InMask);
+ KnownOne &= (~KnownZero);
+ break;
+ }
+ case ISD::FGETSIGN:
+ // All bits are zero except the low bit.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ break;
+
+ case ISD::SUB: {
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+ // We know that the top bits of C-X are clear if X contains less bits
+ // than C (i.e. no wrap-around can happen). For example, 20-X is
+ // positive if we can prove that X is >= 0 and < 16.
+ if (CLHS->getAPIntValue().isNonNegative()) {
+ unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+ // NLZ can't be BitWidth with no sign bit
+ APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+
+ // If all of the MaskV bits are known to be zero, then we know the
+ // output top bits are zero, because we now know that the output is
+ // from [0-C].
+ if ((KnownZero2 & MaskV) == MaskV) {
+ unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+ // Top bits known zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+ }
+ }
+ }
+ }
+ // fall through
+ case ISD::ADD:
+ case ISD::ADDE: {
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ // Output known-0 bits are also known if the top bits of each input are
+ // known to be clear. For example, if one input has the top 10 bits clear
+ // and the other has the top 8 bits clear, we know the top 7 bits of the
+ // output must be clear.
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ unsigned KnownZeroHigh = KnownZero2.countLeadingOnes();
+ unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
+
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ KnownZeroHigh = std::min(KnownZeroHigh,
+ KnownZero2.countLeadingOnes());
+ KnownZeroLow = std::min(KnownZeroLow,
+ KnownZero2.countTrailingOnes());
+
+ if (Op.getOpcode() == ISD::ADD) {
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
+ if (KnownZeroHigh > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
+ break;
+ }
+
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear. We
+ // then return to the caller that the low bit is unknown but that other bits
+ // are known zero.
+ if (KnownZeroLow >= 2) // ADDE
+ KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow);
+ break;
+ }
+ case ISD::SREM:
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue().abs();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = RA - 1;
+ computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
+
+ // The low bits of the first operand are unchanged by the srem.
+ KnownZero = KnownZero2 & LowBits;
+ KnownOne = KnownOne2 & LowBits;
+
+ // If the first operand is non-negative or has all low bits zero, then
+ // the upper bits are all zero.
+ if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ KnownZero |= ~LowBits;
+
+ // If the first operand is negative and not all low bits are zero, then
+ // the upper bits are all one.
+ if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+ KnownOne |= ~LowBits;
+ assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ }
+ }
+ break;
+ case ISD::UREM: {
+ if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ const APInt &RA = Rem->getAPIntValue();
+ if (RA.isPowerOf2()) {
+ APInt LowBits = (RA - 1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1);
+
+ // The upper bits are all zero, the lower ones are unchanged.
+ KnownZero = KnownZero2 | ~LowBits;
+ KnownOne = KnownOne2 & LowBits;
+ break;
+ }
+ }
+
+ // Since the result is less than or equal to either operand, any leading
+ // zero bits in either operand must also exist in the result.
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+
+ uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ KnownOne.clearAllBits();
+ KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+ break;
+ }
+ case ISD::EXTRACT_ELEMENT: {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ const unsigned Index =
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ const unsigned BitWidth = Op.getValueType().getSizeInBits();
+
+ // Remove low part of known bits mask
+ KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
+ KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth);
+
+ // Remove high part of known bit mask
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+ break;
+ }
+ case ISD::BSWAP: {
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ KnownZero = KnownZero2.byteSwap();
+ KnownOne = KnownOne2.byteSwap();
+ break;
+ }
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX: {
+ APInt Op0Zero, Op0One;
+ APInt Op1Zero, Op1One;
+ computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth);
+ computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth);
+
+ KnownZero = Op0Zero & Op1Zero;
+ KnownOne = Op0One & Op1One;
+ break;
+ }
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ if (unsigned Align = InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ break;
+ }
+ break;
+
+ default:
+ if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+ break;
+ // Fallthrough
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_W_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ // Allow the target to implement this method for its nodes.
+ TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
+ break;
+ }
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+}
+
+bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
+ // A left-shift of a constant one will have exactly one bit set because
+ // shifting the bit off the end is undefined.
+ if (Val.getOpcode() == ISD::SHL) {
+ auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
+ if (C && C->getAPIntValue() == 1)
+ return true;
+ }
+
+ // Similarly, a logical right-shift of a constant sign-bit will have exactly
+ // one bit set.
+ if (Val.getOpcode() == ISD::SRL) {
+ auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
+ if (C && C->getAPIntValue().isSignBit())
+ return true;
+ }
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to computeKnownBits to catch other known cases.
+ EVT OpVT = Val.getValueType();
+ unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
+ APInt KnownZero, KnownOne;
+ computeKnownBits(Val, KnownZero, KnownOne);
+ return (KnownZero.countPopulation() == BitWidth - 1) &&
+ (KnownOne.countPopulation() == 1);
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isInteger() && "Invalid VT!");
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
+ unsigned Tmp, Tmp2;
+ unsigned FirstAnswer = 1;
+
+ if (Depth == 6)
+ return 1; // Limit search depth.
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::AssertSext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::AssertZext:
+ Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+ return VTBits-Tmp;
+
+ case ISD::Constant: {
+ const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+ return Val.getNumSignBits();
+ }
+
+ case ISD::SIGN_EXTEND:
+ Tmp =
+ VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+ case ISD::SIGN_EXTEND_INREG:
+ // Max of the input and what this extends.
+ Tmp =
+ cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+ Tmp = VTBits-Tmp+1;
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ return std::max(Tmp, Tmp2);
+
+ case ISD::SRA:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ // SRA X, C -> adds C sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Tmp += C->getZExtValue();
+ if (Tmp > VTBits) Tmp = VTBits;
+ }
+ return Tmp;
+ case ISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ // shl destroys sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (C->getZExtValue() >= VTBits || // Bad shift.
+ C->getZExtValue() >= Tmp) break; // Shifted all sign bits out.
+ return Tmp - C->getZExtValue();
+ }
+ break;
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: // NOT is handled here.
+ // Logical binary ops preserve the number of sign bits at the worst.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp != 1) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ FirstAnswer = std::min(Tmp, Tmp2);
+ // We computed what we know about the sign bits as our first
+ // answer. Now proceed to the generic code that uses
+ // computeKnownBits, and pick whichever answer is better.
+ }
+ break;
+
+ case ISD::SELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ return std::min(Tmp, Tmp2);
+ case ISD::SELECT_CC:
+ Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
+ return std::min(Tmp, Tmp2);
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ return std::min(Tmp, Tmp2);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ if (Op.getResNo() != 1)
+ break;
+ // The boolean result conforms to getBooleanContents. Fall through.
+ // If setcc returns 0/-1, all bits are sign bits.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::SETCC:
+ // If setcc returns 0/-1, all bits are sign bits.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+ // Handle rotate right by N like a rotate left by 32-N.
+ if (Op.getOpcode() == ISD::ROTR)
+ RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+ // If we aren't rotating out all of the known-in sign bits, return the
+ // number that are left. This handles rotl(sext(x), 1) for example.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ }
+ break;
+ case ISD::ADD:
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+
+ // Special case decrementing a value (ADD X, -1):
+ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ if (CRHS->isAllOnesValue()) {
+ APInt KnownZero, KnownOne;
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+ return VTBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (KnownZero.isNegative())
+ return Tmp;
+ }
+
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+ return std::min(Tmp, Tmp2)-1;
+
+ case ISD::SUB:
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ if (Tmp2 == 1) return 1;
+
+ // Handle NEG.
+ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+ if (CLHS->isNullValue()) {
+ APInt KnownZero, KnownOne;
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
+ return VTBits;
+
+ // If the input is known to be positive (the sign bit is known clear),
+ // the output of the NEG has the same number of sign bits as the input.
+ if (KnownZero.isNegative())
+ return Tmp2;
+
+ // Otherwise, we treat this like a SUB.
+ }
+
+ // Sub can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ if (Tmp == 1) return 1; // Early out.
+ return std::min(Tmp, Tmp2)-1;
+ case ISD::TRUNCATE:
+ // FIXME: it's tricky to do anything useful for this, but it is an important
+ // case for targets like X86.
+ break;
+ case ISD::EXTRACT_ELEMENT: {
+ const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ const int BitWidth = Op.getValueType().getSizeInBits();
+ const int Items =
+ Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+
+ // Get reverse index (starting from 1), Op1 value indexes elements from
+ // little end. Sign starts at big end.
+ const int rIndex = Items - 1 -
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ // If the sign portion ends in our element the subtraction gives correct
+ // result. Otherwise it gives either negative or > bitwidth result
+ return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
+ }
+ }
+
+ // If we are looking at the loaded value of the SDNode.
+ if (Op.getResNo() == 0) {
+ // Handle LOADX separately here. EXTLOAD case will fallthrough.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+ unsigned ExtType = LD->getExtensionType();
+ switch (ExtType) {
+ default: break;
+ case ISD::SEXTLOAD: // '17' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp+1;
+ case ISD::ZEXTLOAD: // '16' bits known
+ Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+ return VTBits-Tmp;
+ }
+ }
+ }
+
+ // Allow the target to implement this method for its nodes.
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) {
+ unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth);
+ if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ }
+
+ // Finally, if we can prove that the top bits of the result are 0's or 1's,
+ // use this information.
+ APInt KnownZero, KnownOne;
+ computeKnownBits(Op, KnownZero, KnownOne, Depth);
+
+ APInt Mask;
+ if (KnownZero.isNegative()) { // sign bit is 0
+ Mask = KnownZero;
+ } else if (KnownOne.isNegative()) { // sign bit is 1;
+ Mask = KnownOne;
+ } else {
+ // Nothing known.
+ return FirstAnswer;
+ }
+
+ // Okay, we know that the sign bit in Mask is set. Use CLZ to determine
+ // the number of identical bits in the top of the input value.
+ Mask = ~Mask;
+ Mask <<= Mask.getBitWidth()-VTBits;
+ // Return # leading zeros. We use 'min' here in case Val was zero before
+ // shifting. We don't want to return '64' as for an i32 "0".
+ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0),
+ cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ return false;
+
+ return true;
+}
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+ // If we're told that NaNs won't happen, assume they won't.
+ if (getTarget().Options.NoNaNsFPMath)
+ return true;
+
+ // If the value is a constant, we can obviously see if it is a NaN or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->getValueAPF().isNaN();
+
+ // TODO: Recognize more cases here.
+
+ return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->isZero();
+
+ // TODO: Recognize more cases here.
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::OR:
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return !C->isNullValue();
+ break;
+ }
+
+ return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+ // Check the obvious case.
+ if (A == B) return true;
+
+ // For for negative and positive zero.
+ if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+ if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+ if (CA->isZero() && CB->isZero()) return true;
+
+ // Otherwise they may not be equal.
+ return false;
+}
+
+bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
+ assert(A.getValueType() == B.getValueType() &&
+ "Values must have the same type");
+ APInt AZero, AOne;
+ APInt BZero, BOne;
+ computeKnownBits(A, AZero, AOne);
+ computeKnownBits(B, BZero, BOne);
+ return (AZero | BZero).isAllOnesValue();
+}
+
+static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
+ llvm::SelectionDAG &DAG) {
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ // Concat of UNDEFs is UNDEF.
+ if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
+ return DAG.getUNDEF(VT);
+
+ // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
+ // simplified to one big BUILD_VECTOR.
+ // FIXME: Add support for SCALAR_TO_VECTOR as well.
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 16> Elts;
+ for (SDValue Op : Ops) {
+ EVT OpVT = Op.getValueType();
+ if (Op.isUndef())
+ Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
+ else if (Op.getOpcode() == ISD::BUILD_VECTOR)
+ Elts.append(Op->op_begin(), Op->op_end());
+ else
+ return SDValue();
+ }
+
+ // BUILD_VECTOR requires all inputs to be of the same type, find the
+ // maximum type and extend them all.
+ for (SDValue Op : Elts)
+ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+
+ if (SVT.bitsGT(VT.getScalarType()))
+ for (SDValue &Op : Elts)
+ Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, DL, SVT)
+ : DAG.getSExtOrTrunc(Op, DL, SVT);
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
+}
+
+/// Gets or creates the specified node.
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), None);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(),
+ getVTList(VT));
+ CSEMap.InsertNode(N, IP);
+
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue Operand) {
+ // Constant fold unary operations with an integer constant operand. Even
+ // opaque constant will be folded, because the folding of unary operations
+ // doesn't create new constants with different values. Nevertheless, the
+ // opaque flag is preserved during folding to prevent future folding with
+ // other constants.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) {
+ const APInt &Val = C->getAPIntValue();
+ switch (Opcode) {
+ default: break;
+ case ISD::SIGN_EXTEND:
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::TRUNCATE:
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
+ C->isTargetOpcode(), C->isOpaque());
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: {
+ APFloat apf(EVTToAPFloatSemantics(VT),
+ APInt::getNullValue(VT.getSizeInBits()));
+ (void)apf.convertFromAPInt(Val,
+ Opcode==ISD::SINT_TO_FP,
+ APFloat::rmNearestTiesToEven);
+ return getConstantFP(apf, DL, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
+ return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
+ if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+ return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
+ if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+ return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
+ if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+ return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT);
+ break;
+ case ISD::BSWAP:
+ return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTPOP:
+ return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ }
+ }
+
+ // Constant fold unary operations with a floating point constant operand.
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) {
+ APFloat V = C->getValueAPF(); // make copy
+ switch (Opcode) {
+ case ISD::FNEG:
+ V.changeSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FABS:
+ V.clearSign();
+ return getConstantFP(V, DL, VT);
+ case ISD::FCEIL: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ break;
+ }
+ case ISD::FTRUNC: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ break;
+ }
+ case ISD::FFLOOR: {
+ APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+ if (fs == APFloat::opOK || fs == APFloat::opInexact)
+ return getConstantFP(V, DL, VT);
+ break;
+ }
+ case ISD::FP_EXTEND: {
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, DL, VT);
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: {
+ integerPart x[2];
+ bool ignored;
+ static_assert(integerPartWidth >= 64, "APFloat parts too small!");
+ // FIXME need to be more flexible about rounding mode.
+ APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+ Opcode==ISD::FP_TO_SINT,
+ APFloat::rmTowardZero, &ignored);
+ if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ break;
+ APInt api(VT.getSizeInBits(), x);
+ return getConstant(api, DL, VT);
+ }
+ case ISD::BITCAST:
+ if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
+ return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
+ else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+ return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT);
+ else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+ return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
+ break;
+ }
+ }
+
+ // Constant fold unary operations with a vector integer or float operand.
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) {
+ if (BV->isConstant()) {
+ switch (Opcode) {
+ default:
+ // FIXME: Entirely reasonable to perform folding of other unary
+ // operations here as the need arises.
+ break;
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FFLOOR:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP: {
+ SDValue Ops = { Operand };
+ if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return Fold;
+ }
+ }
+ }
+ }
+
+ unsigned OpOpcode = Operand.getNode()->getOpcode();
+ switch (Opcode) {
+ case ISD::TokenFactor:
+ case ISD::MERGE_VALUES:
+ case ISD::CONCAT_VECTORS:
+ return Operand; // Factor, merge or concat of one node? No need.
+ case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+ case ISD::FP_EXTEND:
+ assert(VT.isFloatingPoint() &&
+ Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+ if (Operand.getValueType() == VT) return Operand; // noop conversion.
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid fpext node, dst < src!");
+ if (Operand.isUndef())
+ return getUNDEF(VT);
+ break;
+ case ISD::SIGN_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid SIGN_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid sext node, dst < src!");
+ if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // sext(undef) = 0, because the top bits will all be the same.
+ return getConstant(0, DL, VT);
+ break;
+ case ISD::ZERO_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ZERO_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid zext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT,
+ Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ // zext(undef) = 0, because the top bits will be zero.
+ return getConstant(0, DL, VT);
+ break;
+ case ISD::ANY_EXTEND:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid ANY_EXTEND!");
+ if (Operand.getValueType() == VT) return Operand; // noop extension
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsLT(VT) &&
+ "Invalid anyext node, dst < src!");
+
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND)
+ // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ else if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // (ext (trunx x)) -> x
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = Operand.getNode()->getOperand(0);
+ if (OpOp.getValueType() == VT)
+ return OpOp;
+ }
+ break;
+ case ISD::TRUNCATE:
+ assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ "Invalid TRUNCATE!");
+ if (Operand.getValueType() == VT) return Operand; // noop truncate
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Operand.getValueType().bitsGT(VT) &&
+ "Invalid truncate node, src < dst!");
+ if (OpOpcode == ISD::TRUNCATE)
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND) {
+ // If the source is smaller than the dest, we still need an extend.
+ if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+ .bitsLT(VT.getScalarType()))
+ return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ return Operand.getNode()->getOperand(0);
+ }
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::BSWAP:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid BSWAP!");
+ assert((VT.getScalarSizeInBits() % 16 == 0) &&
+ "BSWAP types must be a multiple of 16 bits!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::BITREVERSE:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid BITREVERSE!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::BITCAST:
+ // Basic sanity checking.
+ assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+ && "Cannot BITCAST between types of different sizes!");
+ if (VT == Operand.getValueType()) return Operand; // noop conversion.
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
+ case ISD::SCALAR_TO_VECTOR:
+ assert(VT.isVector() && !Operand.getValueType().isVector() &&
+ (VT.getVectorElementType() == Operand.getValueType() ||
+ (VT.getVectorElementType().isInteger() &&
+ Operand.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ "Illegal SCALAR_TO_VECTOR node!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+ if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Operand.getOperand(1)) &&
+ Operand.getConstantOperandVal(1) == 0 &&
+ Operand.getOperand(0).getValueType() == VT)
+ return Operand.getOperand(0);
+ break;
+ case ISD::FNEG:
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+ if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+ // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
+ return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+ Operand.getNode()->getOperand(0),
+ &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags);
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return Operand.getNode()->getOperand(0);
+ break;
+ case ISD::FABS:
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+ break;
+ }
+
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {Operand};
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
+ const APInt &C2) {
+ switch (Opcode) {
+ case ISD::ADD: return std::make_pair(C1 + C2, true);
+ case ISD::SUB: return std::make_pair(C1 - C2, true);
+ case ISD::MUL: return std::make_pair(C1 * C2, true);
+ case ISD::AND: return std::make_pair(C1 & C2, true);
+ case ISD::OR: return std::make_pair(C1 | C2, true);
+ case ISD::XOR: return std::make_pair(C1 ^ C2, true);
+ case ISD::SHL: return std::make_pair(C1 << C2, true);
+ case ISD::SRL: return std::make_pair(C1.lshr(C2), true);
+ case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
+ case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
+ case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
+ case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
+ case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
+ case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
+ case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
+ case ISD::UDIV:
+ if (!C2.getBoolValue())
+ break;
+ return std::make_pair(C1.udiv(C2), true);
+ case ISD::UREM:
+ if (!C2.getBoolValue())
+ break;
+ return std::make_pair(C1.urem(C2), true);
+ case ISD::SDIV:
+ if (!C2.getBoolValue())
+ break;
+ return std::make_pair(C1.sdiv(C2), true);
+ case ISD::SREM:
+ if (!C2.getBoolValue())
+ break;
+ return std::make_pair(C1.srem(C2), true);
+ }
+ return std::make_pair(APInt(1, 0), false);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
+ EVT VT, const ConstantSDNode *Cst1,
+ const ConstantSDNode *Cst2) {
+ if (Cst1->isOpaque() || Cst2->isOpaque())
+ return SDValue();
+
+ std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
+ Cst2->getAPIntValue());
+ if (!Folded.second)
+ return SDValue();
+ return getConstant(Folded.first, DL, VT);
+}
+
+SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
+ const GlobalAddressSDNode *GA,
+ const SDNode *N2) {
+ if (GA->getOpcode() != ISD::GlobalAddress)
+ return SDValue();
+ if (!TLI->isOffsetFoldingLegal(GA))
+ return SDValue();
+ const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2);
+ if (!Cst2)
+ return SDValue();
+ int64_t Offset = Cst2->getSExtValue();
+ switch (Opcode) {
+ case ISD::ADD: break;
+ case ISD::SUB: Offset = -uint64_t(Offset); break;
+ default: return SDValue();
+ }
+ return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT,
+ GA->getOffset() + uint64_t(Offset));
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDNode *Cst1,
+ SDNode *Cst2) {
+ // If the opcode is a target-specific ISD node, there's nothing we can
+ // do here and the operand rules may not line up with the below, so
+ // bail early.
+ if (Opcode >= ISD::BUILTIN_OP_END)
+ return SDValue();
+
+ // Handle the case of two scalars.
+ if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
+ if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
+ SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2);
+ assert((!Folded || !VT.isVector()) &&
+ "Can't fold vectors ops with scalar operands");
+ return Folded;
+ }
+ }
+
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1))
+ return FoldSymbolOffset(Opcode, VT, GA, Cst2);
+ if (isCommutativeBinOp(Opcode))
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
+ return FoldSymbolOffset(Opcode, VT, GA, Cst1);
+
+ // For vectors extract each constant element into Inputs so we can constant
+ // fold them individually.
+ BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
+ BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
+ if (!BV1 || !BV2)
+ return SDValue();
+
+ assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
+
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 4> Outputs;
+ for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
+ ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
+ ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
+ if (!V1 || !V2) // Not a constant, bail.
+ return SDValue();
+
+ if (V1->isOpaque() || V2->isOpaque())
+ return SDValue();
+
+ // Avoid BUILD_VECTOR nodes that perform implicit truncation.
+ // FIXME: This is valid and could be handled by truncating the APInts.
+ if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
+ return SDValue();
+
+ // Fold one vector element.
+ std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(),
+ V2->getAPIntValue());
+ if (!Folded.second)
+ return SDValue();
+ Outputs.push_back(getConstant(Folded.first, DL, SVT));
+ }
+
+ assert(VT.getVectorNumElements() == Outputs.size() &&
+ "Vector size mismatch!");
+
+ // We may have a vector type but a scalar result. Create a splat.
+ Outputs.resize(VT.getVectorNumElements(), Outputs.back());
+
+ // Build a big vector out of the scalar elements we generated.
+ return getBuildVector(VT, SDLoc(), Outputs);
+}
+
+SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
+ const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops,
+ const SDNodeFlags *Flags) {
+ // If the opcode is a target-specific ISD node, there's nothing we can
+ // do here and the operand rules may not line up with the below, so
+ // bail early.
+ if (Opcode >= ISD::BUILTIN_OP_END)
+ return SDValue();
+
+ // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+ if (!VT.isVector())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
+ return !Op.getValueType().isVector() ||
+ Op.getValueType().getVectorNumElements() == NumElts;
+ };
+
+ auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+ return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) ||
+ (BV && BV->isConstant());
+ };
+
+ // All operands must be vector types with the same number of elements as
+ // the result type and must be either UNDEF or a build vector of constant
+ // or UNDEF scalars.
+ if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) ||
+ !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize))
+ return SDValue();
+
+ // If we are comparing vectors, then the result needs to be a i1 boolean
+ // that is then sign-extended back to the legal result type.
+ EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
+
+ // Find legal integer scalar type for constant promotion and
+ // ensure that its scalar size is at least as large as source.
+ EVT LegalSVT = VT.getScalarType();
+ if (LegalSVT.isInteger()) {
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(VT.getScalarType()))
+ return SDValue();
+ }
+
+ // Constant fold each scalar lane separately.
+ SmallVector<SDValue, 4> ScalarResults;
+ for (unsigned i = 0; i != NumElts; i++) {
+ SmallVector<SDValue, 4> ScalarOps;
+ for (SDValue Op : Ops) {
+ EVT InSVT = Op.getValueType().getScalarType();
+ BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
+ if (!InBV) {
+ // We've checked that this is UNDEF or a constant of some kind.
+ if (Op.isUndef())
+ ScalarOps.push_back(getUNDEF(InSVT));
+ else
+ ScalarOps.push_back(Op);
+ continue;
+ }
+
+ SDValue ScalarOp = InBV->getOperand(i);
+ EVT ScalarVT = ScalarOp.getValueType();
+
+ // Build vector (integer) scalar operands may need implicit
+ // truncation - do this before constant folding.
+ if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
+ ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
+
+ ScalarOps.push_back(ScalarOp);
+ }
+
+ // Constant fold the scalar operands.
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+
+ // Legalize the (integer) scalar constant if necessary.
+ if (LegalSVT != SVT)
+ ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
+
+ // Scalar folding only succeeded if the result is a constant or UNDEF.
+ if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
+ ScalarResult.getOpcode() != ISD::ConstantFP)
+ return SDValue();
+ ScalarResults.push_back(ScalarResult);
+ }
+
+ return getBuildVector(VT, DL, ScalarResults);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2,
+ const SDNodeFlags *Flags) {
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+ // Canonicalize constant to RHS if commutative.
+ if (isCommutativeBinOp(Opcode)) {
+ if (N1C && !N2C) {
+ std::swap(N1C, N2C);
+ std::swap(N1, N2);
+ } else if (N1CFP && !N2CFP) {
+ std::swap(N1CFP, N2CFP);
+ std::swap(N1, N2);
+ }
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::TokenFactor:
+ assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+ N2.getValueType() == MVT::Other && "Invalid token factor!");
+ // Fold trivial token factors.
+ if (N1.getOpcode() == ISD::EntryToken) return N2;
+ if (N2.getOpcode() == ISD::EntryToken) return N1;
+ if (N1 == N2) return N1;
+ break;
+ case ISD::CONCAT_VECTORS: {
+ // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
+ SDValue Ops[] = {N1, N2};
+ if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::AND:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
+ // worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N2;
+ if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ return N1;
+ break;
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
+ // it's worth handling here.
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::MULHU:
+ case ISD::MULHS:
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (getTarget().Options.UnsafeFPMath) {
+ if (Opcode == ISD::FADD) {
+ // x+0 --> x
+ if (N2CFP && N2CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FSUB) {
+ // x-0 --> x
+ if (N2CFP && N2CFP->getValueAPF().isZero())
+ return N1;
+ } else if (Opcode == ISD::FMUL) {
+ // x*0 --> 0
+ if (N2CFP && N2CFP->isZero())
+ return N2;
+ // x*1 --> x
+ if (N2CFP && N2CFP->isExactlyValue(1.0))
+ return N1;
+ }
+ }
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
+ case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
+ assert(N1.getValueType() == VT &&
+ N1.getValueType().isFloatingPoint() &&
+ N2.getValueType().isFloatingPoint() &&
+ "Invalid FCOPYSIGN!");
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(VT == N1.getValueType() &&
+ "Shift operators return type must be the same as their first arg");
+ assert(VT.isInteger() && N2.getValueType().isInteger() &&
+ "Shifts only work on integers");
+ assert((!VT.isVector() || VT == N2.getValueType()) &&
+ "Vector shift amounts must be in the same as their first arg");
+ // Verify that the shift amount VT is bit enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueType().getSizeInBits() >=
+ Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
+
+ // Always fold shifts of i1 values so the code generator doesn't need to
+ // handle them. Since we know the size of the shift has to be less than the
+ // size of the value, the shift/rotate count is guaranteed to be zero.
+ if (VT == MVT::i1)
+ return N1;
+ if (N2C && N2C->isNullValue())
+ return N1;
+ break;
+ case ISD::FP_ROUND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg round!");
+ assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+ "Cannot FP_ROUND_INREG integer types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "FP_ROUND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in FP_ROUND_INREG");
+ assert(EVT.bitsLE(VT) && "Not rounding down!");
+ (void)EVT;
+ if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
+ break;
+ }
+ case ISD::FP_ROUND:
+ assert(VT.isFloatingPoint() &&
+ N1.getValueType().isFloatingPoint() &&
+ VT.bitsLE(N1.getValueType()) &&
+ N2C && "Invalid FP_ROUND!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ break;
+ case ISD::AssertSext:
+ case ISD::AssertZext: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(!EVT.isVector() &&
+ "AssertSExt/AssertZExt type should be the vector element type "
+ "rather than the vector type!");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (VT == EVT) return N1; // noop assertion.
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT EVT = cast<VTSDNode>(N2)->getVT();
+ assert(VT == N1.getValueType() && "Not an inreg extend!");
+ assert(VT.isInteger() && EVT.isInteger() &&
+ "Cannot *_EXTEND_INREG FP types");
+ assert(EVT.isVector() == VT.isVector() &&
+ "SIGN_EXTEND_INREG type should be vector iff the operand "
+ "type is vector!");
+ assert((!EVT.isVector() ||
+ EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+ "Vector element counts must match in SIGN_EXTEND_INREG");
+ assert(EVT.bitsLE(VT) && "Not extending!");
+ if (EVT == VT) return N1; // Not actually extending
+
+ auto SignExtendInReg = [&](APInt Val) {
+ unsigned FromBits = EVT.getScalarType().getSizeInBits();
+ Val <<= Val.getBitWidth() - FromBits;
+ Val = Val.ashr(Val.getBitWidth() - FromBits);
+ return getConstant(Val, DL, VT.getScalarType());
+ };
+
+ if (N1C) {
+ const APInt &Val = N1C->getAPIntValue();
+ return SignExtendInReg(Val);
+ }
+ if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
+ SmallVector<SDValue, 8> Ops;
+ for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ SDValue Op = N1.getOperand(i);
+ if (Op.isUndef()) {
+ Ops.push_back(getUNDEF(VT.getScalarType()));
+ continue;
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ APInt Val = C->getAPIntValue();
+ Val = Val.zextOrTrunc(VT.getScalarSizeInBits());
+ Ops.push_back(SignExtendInReg(Val));
+ continue;
+ }
+ break;
+ }
+ if (Ops.size() == VT.getVectorNumElements())
+ return getBuildVector(VT, DL, Ops);
+ }
+ break;
+ }
+ case ISD::EXTRACT_VECTOR_ELT:
+ // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
+ if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+ return getUNDEF(VT);
+
+ // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+ // expanding copies of large vectors from registers.
+ if (N2C &&
+ N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0) {
+ unsigned Factor =
+ N1.getOperand(0).getValueType().getVectorNumElements();
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ N1.getOperand(N2C->getZExtValue() / Factor),
+ getConstant(N2C->getZExtValue() % Factor, DL,
+ N2.getValueType()));
+ }
+
+ // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+ // expanding large vector constants.
+ if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue Elt = N1.getOperand(N2C->getZExtValue());
+
+ if (VT != Elt.getValueType())
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated, and the result implicitly
+ // extended. Make that explicit here.
+ Elt = getAnyExtOrTrunc(Elt, DL, VT);
+
+ return Elt;
+ }
+
+ // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+ // operations are lowered to scalars.
+ if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ // If the indices are the same, return the inserted element else
+ // if the indices are known different, extract the element from
+ // the original vector.
+ SDValue N1Op2 = N1.getOperand(2);
+ ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2);
+
+ if (N1Op2C && N2C) {
+ if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+ if (VT == N1.getOperand(1).getValueType())
+ return N1.getOperand(1);
+ else
+ return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+ }
+
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+ }
+ }
+ break;
+ case ISD::EXTRACT_ELEMENT:
+ assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+ assert(!N1.getValueType().isVector() && !VT.isVector() &&
+ (N1.getValueType().isInteger() == VT.isInteger()) &&
+ N1.getValueType() != VT &&
+ "Wrong types for EXTRACT_ELEMENT!");
+
+ // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+ // 64-bit integers into 32-bit parts. Instead of building the extract of
+ // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+ if (N1.getOpcode() == ISD::BUILD_PAIR)
+ return N1.getOperand(N2C->getZExtValue());
+
+ // EXTRACT_ELEMENT of a constant int is also very common.
+ if (N1C) {
+ unsigned ElementSize = VT.getSizeInBits();
+ unsigned Shift = ElementSize * N2C->getZExtValue();
+ APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift);
+ return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
+ }
+ break;
+ case ISD::EXTRACT_SUBVECTOR:
+ if (VT.isSimple() && N1.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Extract subvector VTs must be a vectors!");
+ assert(VT.getVectorElementType() ==
+ N1.getValueType().getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
+ "Extract subvector must be from larger vector to smaller vector!");
+
+ if (N2C) {
+ assert((VT.getVectorNumElements() + N2C->getZExtValue()
+ <= N1.getValueType().getVectorNumElements())
+ && "Extract subvector overflow!");
+ }
+
+ // Trivial extraction.
+ if (VT.getSimpleVT() == N1.getSimpleValueType())
+ return N1;
+ }
+ break;
+ }
+
+ // Perform trivial constant folding.
+ if (SDValue SV =
+ FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
+ return SV;
+
+ // Constant fold FP operations.
+ bool HasFPExceptions = TLI->hasFloatingPointExceptions();
+ if (N1CFP) {
+ if (N2CFP) {
+ APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+ APFloat::opStatus s;
+ switch (Opcode) {
+ case ISD::FADD:
+ s = V1.add(V2, APFloat::rmNearestTiesToEven);
+ if (!HasFPExceptions || s != APFloat::opInvalidOp)
+ return getConstantFP(V1, DL, VT);
+ break;
+ case ISD::FSUB:
+ s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+ if (!HasFPExceptions || s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, DL, VT);
+ break;
+ case ISD::FMUL:
+ s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+ if (!HasFPExceptions || s!=APFloat::opInvalidOp)
+ return getConstantFP(V1, DL, VT);
+ break;
+ case ISD::FDIV:
+ s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+ if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
+ s!=APFloat::opDivByZero)) {
+ return getConstantFP(V1, DL, VT);
+ }
+ break;
+ case ISD::FREM :
+ s = V1.mod(V2);
+ if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
+ s!=APFloat::opDivByZero)) {
+ return getConstantFP(V1, DL, VT);
+ }
+ break;
+ case ISD::FCOPYSIGN:
+ V1.copySign(V2);
+ return getConstantFP(V1, DL, VT);
+ default: break;
+ }
+ }
+
+ if (Opcode == ISD::FP_ROUND) {
+ APFloat V = N1CFP->getValueAPF(); // make copy
+ bool ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &ignored);
+ return getConstantFP(V, DL, VT);
+ }
+ }
+
+ // Canonicalize an UNDEF to the RHS, even over a constant.
+ if (N1.isUndef()) {
+ if (isCommutativeBinOp(Opcode)) {
+ std::swap(N1, N2);
+ } else {
+ switch (Opcode) {
+ case ISD::FP_ROUND_INREG:
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::SUB:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SRA:
+ return N1; // fold op(undef, arg2) -> undef
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N2;
+ }
+ }
+ }
+
+ // Fold a bunch of operators when the RHS is undef.
+ if (N2.isUndef()) {
+ switch (Opcode) {
+ case ISD::XOR:
+ if (N1.isUndef())
+ // Handle undef ^ undef -> 0 special case. This is a common
+ // idiom (misuse).
+ return getConstant(0, DL, VT);
+ // fallthrough
+ case ISD::ADD:
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::SDIV:
+ case ISD::UREM:
+ case ISD::SREM:
+ return N2; // fold op(arg1, undef) -> undef
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ if (getTarget().Options.UnsafeFPMath)
+ return N2;
+ break;
+ case ISD::MUL:
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SHL:
+ if (!VT.isVector())
+ return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0
+ // For vectors, we can't easily build an all zero vector, just return
+ // the LHS.
+ return N1;
+ case ISD::OR:
+ if (!VT.isVector())
+ return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
+ // For vectors, we can't easily build an all one vector, just return
+ // the LHS.
+ return N1;
+ case ISD::SRA:
+ return N1;
+ }
+ }
+
+ // Memoize this node if possible.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ if (VT != MVT::Glue) {
+ SDValue Ops[] = {N1, N2};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ if (Flags)
+ E->intersectFlagsWith(Flags);
+ return SDValue(E, 0);
+ }
+
+ N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
+ }
+
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ // Perform various simplifications.
+ switch (Opcode) {
+ case ISD::FMA: {
+ ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+ ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3);
+ if (N1CFP && N2CFP && N3CFP) {
+ APFloat V1 = N1CFP->getValueAPF();
+ const APFloat &V2 = N2CFP->getValueAPF();
+ const APFloat &V3 = N3CFP->getValueAPF();
+ APFloat::opStatus s =
+ V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+ if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
+ return getConstantFP(V1, DL, VT);
+ }
+ break;
+ }
+ case ISD::CONCAT_VECTORS: {
+ // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::SETCC: {
+ // Use FoldSetCC to simplify SETCC's.
+ if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
+ return V;
+ // Vector constant folding.
+ SDValue Ops[] = {N1, N2, N3};
+ if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ return V;
+ break;
+ }
+ case ISD::SELECT:
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
+ if (N1C->getZExtValue())
+ return N2; // select true, X, Y -> X
+ return N3; // select false, X, Y -> Y
+ }
+
+ if (N2 == N3) return N2; // select C, X, X -> X
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Index = N3;
+ if (VT.isSimple() && N1.getValueType().isSimple()
+ && N2.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ N2.getValueType().isVector() &&
+ "Insert subvector VTs must be a vectors");
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ if (isa<ConstantSDNode>(Index)) {
+ assert((N2.getValueType().getVectorNumElements() +
+ cast<ConstantSDNode>(Index)->getZExtValue()
+ <= VT.getVectorNumElements())
+ && "Insert subvector overflow!");
+ }
+
+ // Trivial insertion.
+ if (VT.getSimpleVT() == N2.getSimpleValueType())
+ return N2;
+ }
+ break;
+ }
+ case ISD::BITCAST:
+ // Fold bit_convert nodes from a type to themselves.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
+ }
+
+ // Memoize node if it doesn't produce a flag.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = {N1, N2, N3};
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VT, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+ SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VT, Ops);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+ SmallVector<SDValue, 8> ArgChains;
+
+ // Include the original chain at the beginning of the list. When this is
+ // used by target LowerCall hooks, this helps legalize find the
+ // CALLSEQ_BEGIN node.
+ ArgChains.push_back(Chain);
+
+ // Add a chain value for each stack argument.
+ for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+ UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+ if (FI->getIndex() < 0)
+ ArgChains.push_back(SDValue(L, 1));
+
+ // Build a tokenfactor for all the chains.
+ return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+ const SDLoc &dl) {
+ assert(!Value.isUndef());
+
+ unsigned NumBits = VT.getScalarType().getSizeInBits();
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ assert(C->getAPIntValue().getBitWidth() == 8);
+ APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
+ if (VT.isInteger())
+ return DAG.getConstant(Val, dl, VT);
+ return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
+ VT);
+ }
+
+ assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
+ EVT IntVT = VT.getScalarType();
+ if (!IntVT.isInteger())
+ IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
+ DAG.getConstant(Magic, dl, IntVT));
+ }
+
+ if (VT != Value.getValueType() && !VT.isInteger())
+ Value = DAG.getBitcast(VT.getScalarType(), Value);
+ if (VT != Value.getValueType())
+ Value = DAG.getSplatBuildVector(VT, dl, Value);
+
+ return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
+ const TargetLowering &TLI, StringRef Str) {
+ // Handle vector with all elements zero.
+ if (Str.empty()) {
+ if (VT.isInteger())
+ return DAG.getConstant(0, dl, VT);
+ else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
+ return DAG.getConstantFP(0.0, dl, VT);
+ else if (VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getConstant(0, dl,
+ EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElts)));
+ } else
+ llvm_unreachable("Expected type!");
+ }
+
+ assert(!VT.isVector() && "Can't handle vector type here!");
+ unsigned NumVTBits = VT.getSizeInBits();
+ unsigned NumVTBytes = NumVTBits / 8;
+ unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
+ APInt Val(NumVTBits, 0);
+ if (DAG.getDataLayout().isLittleEndian()) {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+ } else {
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
+ }
+
+ // If the "cost" of materializing the integer immediate is less than the cost
+ // of a load, then it is cost effective to turn the load into the immediate.
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
+ return DAG.getConstant(Val, dl, VT);
+ return SDValue(nullptr, 0);
+}
+
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
+ const SDLoc &DL) {
+ EVT VT = Base.getValueType();
+ return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
+ uint64_t SrcDelta = 0;
+ GlobalAddressSDNode *G = nullptr;
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+ }
+ if (!G)
+ return false;
+
+ return getConstantStringInfo(G->getGlobal(), Str,
+ SrcDelta + G->getOffset(), false);
+}
+
+/// Determines the optimal series of memory ops to replace the memset / memcpy.
+/// Return true if the number of memory ops is below the threshold (Limit).
+/// It returns the types of the sequence of memory ops to perform
+/// memset / memcpy by reference.
+static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ bool AllowOverlap,
+ unsigned DstAS, unsigned SrcAS,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
+ "Expecting memcpy / memset source to meet alignment requirement!");
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
+ EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
+ IsMemset, ZeroMemset, MemcpyStrSrc,
+ DAG.getMachineFunction());
+
+ if (VT == MVT::Other) {
+ if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) ||
+ TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) {
+ VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS);
+ } else {
+ switch (DstAlign & 7) {
+ case 0: VT = MVT::i64; break;
+ case 4: VT = MVT::i32; break;
+ case 2: VT = MVT::i16; break;
+ default: VT = MVT::i8; break;
+ }
+ }
+
+ MVT LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+ assert(LVT.isInteger());
+
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ EVT NewVT = VT;
+ unsigned NewVTSize;
+
+ bool Found = false;
+ if (VT.isVector() || VT.isFloatingPoint()) {
+ NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+ if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+ TLI.isSafeMemOpType(NewVT.getSimpleVT()))
+ Found = true;
+ else if (NewVT == MVT::i64 &&
+ TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+ TLI.isSafeMemOpType(MVT::f64)) {
+ // i64 is usually not legal on 32-bit targets, but f64 may be.
+ NewVT = MVT::f64;
+ Found = true;
+ }
+ }
+
+ if (!Found) {
+ do {
+ NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+ if (NewVT == MVT::i8)
+ break;
+ } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
+ }
+ NewVTSize = NewVT.getSizeInBits() / 8;
+
+ // If the new VT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ // FIXME: Only does this for 64-bit or more since we don't have proper
+ // cost model for unaligned load / store.
+ bool Fast;
+ if (NumMemOps && AllowOverlap &&
+ VTSize >= 8 && NewVTSize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast)
+ VTSize = Size;
+ else {
+ VT = NewVT;
+ VTSize = NewVTSize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction()->optForMinSize();
+ return MF.getFunction()->optForSize();
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, unsigned Align,
+ bool isVol, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memcpy of undef to nop.
+ if (Src.isUndef())
+ return Chain;
+
+ // Expand memcpy to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+ StringRef Str;
+ bool CopyFromStr = isMemSrcFromString(Src, Str);
+ bool isZeroStr = CopyFromStr && Str.empty();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align),
+ (isZeroStr ? 0 : SrcAlign),
+ false, false, CopyFromStr, true,
+ DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(),
+ DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Align &&
+ DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
+ NewAlign /= 2;
+
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ MachineMemOperand::Flags MMOFlags =
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ uint64_t SrcOff = 0, DstOff = 0;
+ for (unsigned i = 0; i != NumMemOps; ++i) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value, Store;
+
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ SrcOff -= VTSize - Size;
+ DstOff -= VTSize - Size;
+ }
+
+ if (CopyFromStr &&
+ (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
+ // It's unlikely a store of a vector immediate can be done in a single
+ // instruction. It would require a load from a constantpool first.
+ // We only handle zero vectors here.
+ // FIXME: Handle other cases where store of vector immediate is done in
+ // a single instruction.
+ Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
+ if (Value.getNode())
+ Store = DAG.getStore(Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
+ }
+
+ if (!Store.getNode()) {
+ // The type might not be legal for the target. This should only happen
+ // if the type is smaller than a legal type, as on PPC, so the right
+ // thing to do is generate a LoadExt/StoreTrunc pair. These simplify
+ // to Load/Store if NVT==VT.
+ // FIXME does the case above also need this?
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ assert(NVT.bitsGE(VT));
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+ DAG.getMemBasePlusOffset(Src, SrcOff, dl),
+ SrcPtrInfo.getWithOffset(SrcOff), VT,
+ MinAlign(SrcAlign, SrcOff), MMOFlags);
+ OutChains.push_back(Value.getValue(1));
+ Store = DAG.getTruncStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ Size -= VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, unsigned Align,
+ bool isVol, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ // Turn a memmove of undef to nop.
+ if (Src.isUndef())
+ return Chain;
+
+ // Expand memmove to a series of load and store ops if the size operand falls
+ // below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
+
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align), SrcAlign,
+ false, false, false, false,
+ DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(),
+ DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ MachineMemOperand::Flags MMOFlags =
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ uint64_t SrcOff = 0, DstOff = 0;
+ SmallVector<SDValue, 8> LoadValues;
+ SmallVector<SDValue, 8> LoadChains;
+ SmallVector<SDValue, 8> OutChains;
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Value;
+
+ Value =
+ DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
+ SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags);
+ LoadValues.push_back(Value);
+ LoadChains.push_back(Value.getValue(1));
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ OutChains.clear();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ SDValue Store;
+
+ Store = DAG.getStore(Chain, dl, LoadValues[i],
+ DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
+/// \brief Lower the call to 'memset' intrinsic function into a series of store
+/// operations.
+///
+/// \param DAG Selection DAG where lowered code is placed.
+/// \param dl Link to corresponding IR location.
+/// \param Chain Control flow dependency.
+/// \param Dst Pointer to destination memory location.
+/// \param Src Value of byte to write into the memory.
+/// \param Size Number of bytes to write.
+/// \param Align Alignment of the destination in bytes.
+/// \param isVol True if destination is volatile.
+/// \param DstPtrInfo IR information on the memory pointer.
+/// \returns New head in the control flow, if lowering was successful, empty
+/// SDValue otherwise.
+///
+/// The function tries to replace 'llvm.memset' intrinsic with several store
+/// operations and value calculation code. This is usually profitable for small
+/// memory size.
+static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size, unsigned Align, bool isVol,
+ MachinePointerInfo DstPtrInfo) {
+ // Turn a memset of undef to nop.
+ if (Src.isUndef())
+ return Chain;
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::vector<EVT> MemOps;
+ bool DstAlignCanChange = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ bool IsZeroVal =
+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
+ Size, (DstAlignCanChange ? 0 : Align), 0,
+ true, IsZeroVal, false, true,
+ DstPtrInfo.getAddrSpace(), ~0u,
+ DAG, TLI))
+ return SDValue();
+
+ if (DstAlignCanChange) {
+ Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
+ SmallVector<SDValue, 8> OutChains;
+ uint64_t DstOff = 0;
+ unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ EVT VT = MemOps[i];
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ if (VTSize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(i == NumMemOps-1 && i != 0);
+ DstOff -= VTSize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
+ SDValue Store = DAG.getStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Align,
+ isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
+ OutChains.push_back(Store);
+ DstOff += VT.getSizeInBits() / 8;
+ Size -= VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+}
+
+static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
+ unsigned AS) {
+ // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
+ // pointer operands can be losslessly bitcasted to pointers of address space 0
+ if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
+ report_fatal_error("cannot lower memory intrinsic in address space " +
+ Twine(AS));
+ }
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, unsigned Align,
+ bool isVol, bool AlwaysInline, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),Align,
+ isVol, false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemcpy(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline,
+ DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ true, DstPtrInfo, SrcPtrInfo);
+ }
+
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
+ // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
+ // memcpy is not guaranteed to be safe. libc memcpys aren't required to
+ // respect volatile, so they may do things like read or write memory
+ // beyond the given memory regions. But fixing this isn't easy, and most
+ // people don't care.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in SDLoc
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, unsigned Align,
+ bool isVol, bool isTailCall,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memmove to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memmove with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Align, isVol,
+ false, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemmove(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
+ // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+ // not be safe. See memcpy above for more details.
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ // FIXME: pass in SDLoc
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
+ SDValue Src, SDValue Size, unsigned Align,
+ bool isVol, bool isTailCall,
+ MachinePointerInfo DstPtrInfo) {
+ assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDValue Result =
+ getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+ Align, isVol, DstPtrInfo);
+
+ if (Result.getNode())
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ if (TSI) {
+ SDValue Result = TSI->EmitTargetCodeForMemset(
+ *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo);
+ if (Result.getNode())
+ return Result;
+ }
+
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+
+ // Emit a library call.
+ Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Src;
+ Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+
+ // FIXME: pass in SDLoc
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
+ .setDiscardResult()
+ .setTailCall(isTailCall);
+
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDVTList VTList, ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering,
+ SynchronizationScope SynchScope) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO, SuccessOrdering,
+ FailureOrdering, SynchScope);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDVTList VTList, ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO, AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomicCmpSwap(
+ unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
+ SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
+ unsigned Alignment, AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+ Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
+ MachineMemOperand::MOStore;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
+
+ return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO,
+ SuccessOrdering, FailureOrdering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
+ EVT MemVT, SDVTList VTs, SDValue Chain,
+ SDValue Ptr, SDValue Cmp, SDValue Swp,
+ MachineMemOperand *MMO,
+ AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+ Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+ SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO,
+ SuccessOrdering, FailureOrdering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDValue Chain, SDValue Ptr, SDValue Val,
+ const Value *PtrVal, unsigned Alignment,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ // An atomic store does not load. An atomic load does not store.
+ // (An atomicrmw obviously both loads and stores.)
+ // For now, atomics are considered to be volatile always, and they are
+ // chained as such.
+ // FIXME: Volatile isn't really correct; we should keep track of atomic
+ // orderings in the memoperand.
+ auto Flags = MachineMemOperand::MOVolatile;
+ if (Opcode != ISD::ATOMIC_STORE)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Opcode != ISD::ATOMIC_LOAD)
+ Flags |= MachineMemOperand::MOStore;
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+ MemVT.getStoreSize(), Alignment);
+
+ return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO,
+ Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ SDValue Chain, SDValue Ptr, SDValue Val,
+ MachineMemOperand *MMO, AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+ Opcode == ISD::ATOMIC_LOAD_SUB ||
+ Opcode == ISD::ATOMIC_LOAD_AND ||
+ Opcode == ISD::ATOMIC_LOAD_OR ||
+ Opcode == ISD::ATOMIC_LOAD_XOR ||
+ Opcode == ISD::ATOMIC_LOAD_NAND ||
+ Opcode == ISD::ATOMIC_LOAD_MIN ||
+ Opcode == ISD::ATOMIC_LOAD_MAX ||
+ Opcode == ISD::ATOMIC_LOAD_UMIN ||
+ Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_SWAP ||
+ Opcode == ISD::ATOMIC_STORE) &&
+ "Invalid Atomic Op");
+
+ EVT VT = Val.getValueType();
+
+ SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
+ getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Val};
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ MachineMemOperand *MMO, AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr};
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
+ if (Ops.size() == 1)
+ return Ops[0];
+
+ SmallVector<EVT, 4> VTs;
+ VTs.reserve(Ops.size());
+ for (unsigned i = 0; i < Ops.size(); ++i)
+ VTs.push_back(Ops[i].getValueType());
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops);
+}
+
+SDValue SelectionDAG::getMemIntrinsicNode(
+ unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
+ EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol,
+ bool ReadMem, bool WriteMem, unsigned Size) {
+ if (Align == 0) // Ensure that codegen never sees alignment 0
+ Align = getEVTAlignment(MemVT);
+
+ MachineFunction &MF = getMachineFunction();
+ auto Flags = MachineMemOperand::MONone;
+ if (WriteMem)
+ Flags |= MachineMemOperand::MOStore;
+ if (ReadMem)
+ Flags |= MachineMemOperand::MOLoad;
+ if (Vol)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (!Size)
+ Size = MemVT.getStoreSize();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
+
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
+}
+
+SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
+ SDVTList VTList,
+ ArrayRef<SDValue> Ops, EVT MemVT,
+ MachineMemOperand *MMO) {
+ assert((Opcode == ISD::INTRINSIC_VOID ||
+ Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
+ Opcode == ISD::LIFETIME_START ||
+ Opcode == ISD::LIFETIME_END ||
+ (Opcode <= INT_MAX &&
+ (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+ "Opcode is not a memory-accessing opcode!");
+
+ // Memoize the node unless it returns a flag.
+ MemIntrinsicSDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
+ VTList, MemVT, MMO);
+ createOperands(N, Ops);
+ }
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+ int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return MachinePointerInfo();
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI,
+ Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+ SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.isUndef())
+ return InferPointerInfo(DAG, Ptr);
+ return MachinePointerInfo();
+}
+
+SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(VT);
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(*this, Ptr, Offset);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges);
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Offset, EVT MemVT,
+ MachineMemOperand *MMO) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use an ext load to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+ "Cannot use an ext load to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ?
+ getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<LoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ExtType, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ VT, MMO);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo,
+ MemVT, Alignment, MMOFlags, AAInfo);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT,
+ MachineMemOperand *MMO) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+ MemVT, MMO);
+}
+
+SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
+ // Don't propagate the invariant flag.
+ auto MMOFlags =
+ LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+ return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->getAlignment(), MMOFlags);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(Val.getValueType());
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(*this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo);
+ return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT, unsigned Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = getEVTAlignment(SVT);
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(*this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo);
+ return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, EVT SVT,
+ MachineMemOperand *MMO) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ if (VT == SVT)
+ return getStore(Chain, dl, Val, Ptr, MMO);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() &&
+ "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<StoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() && "Store is already a indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ST->isTruncatingStore(), ST->getMemoryVT(),
+ ST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue Src0,
+ EVT MemVT, MachineMemOperand *MMO,
+ ISD::LoadExtType ExtTy) {
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
+ MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ExtTy, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool isTrunc) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ isTrunc, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO) {
+ assert(Ops.size() == 5 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
+ MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VTs, VT, MMO);
+ createOperands(N, Ops);
+
+ assert(N->getValue().getValueType() == N->getValueType(0) &&
+ "Incompatible type of the PassThru value in MaskedGatherSDNode");
+ assert(N->getMask().getValueType().getVectorNumElements() ==
+ N->getValueType(0).getVectorNumElements() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorNumElements() ==
+ N->getValueType(0).getVectorNumElements() &&
+ "Vector width mismatch between index and data");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO) {
+ assert(Ops.size() == 5 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
+ VTs, VT, MMO);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorNumElements() ==
+ N->getValue().getValueType().getVectorNumElements() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorNumElements() ==
+ N->getValue().getValueType().getVectorNumElements() &&
+ "Vector width mismatch between index and data");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue SV, unsigned Align) {
+ SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDUse> Ops) {
+ switch (Ops.size()) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0]));
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ // Copy from an SDUse array into an SDValue array for use with
+ // the regular getNode logic.
+ SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end());
+ return getNode(Opcode, DL, VT, NewOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
+ unsigned NumOps = Ops.size();
+ switch (NumOps) {
+ case 0: return getNode(Opcode, DL, VT);
+ case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
+ case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+ default: break;
+ }
+
+ switch (Opcode) {
+ default: break;
+ case ISD::CONCAT_VECTORS: {
+ // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
+ if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ return V;
+ break;
+ }
+ case ISD::SELECT_CC: {
+ assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+ assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+ "LHS and RHS of condition must have same type!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "True and False arms of SelectCC must have same type!");
+ assert(Ops[2].getValueType() == VT &&
+ "select_cc node must be of same type as true and false value!");
+ break;
+ }
+ case ISD::BR_CC: {
+ assert(NumOps == 5 && "BR_CC takes 5 operands!");
+ assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+ "LHS/RHS of comparison should match types!");
+ break;
+ }
+ }
+
+ // Memoize nodes.
+ SDNode *N;
+ SDVTList VTs = getVTList(VT);
+
+ if (VT != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
+
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+ }
+
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
+ ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) {
+ return getNode(Opcode, DL, getVTList(ResultTys), Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ if (VTList.NumVTs == 1)
+ return getNode(Opcode, DL, VTList.VTs[0], Ops);
+
+#if 0
+ switch (Opcode) {
+ // FIXME: figure out how to safely handle things like
+ // int foo(int x) { return 1 << (x & 255); }
+ // int bar() { return foo(256); }
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ case ISD::SHL_PARTS:
+ if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ else if (N3.getOpcode() == ISD::AND)
+ if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+ // If the and is only masking out bits that cannot effect the shift,
+ // eliminate the and.
+ unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+ if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+ return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+ }
+ break;
+ }
+#endif
+
+ // Memoize the node unless it returns a flag.
+ SDNode *N;
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ } else {
+ N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList);
+ createOperands(N, Ops);
+ }
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTList) {
+ return getNode(Opcode, DL, VTList, None);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1) {
+ SDValue Ops[] = { N1 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2) {
+ SDValue Ops[] = { N1, N2 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDValue Ops[] = { N1, N2, N3 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4) {
+ SDValue Ops[] = { N1, N2, N3, N4 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+ SDValue N5) {
+ SDValue Ops[] = { N1, N2, N3, N4, N5 };
+ return getNode(Opcode, DL, VTList, Ops);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+ return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(2U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(2);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(3U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(3);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(4U);
+ ID.AddInteger(VT1.getRawBits());
+ ID.AddInteger(VT2.getRawBits());
+ ID.AddInteger(VT3.getRawBits());
+ ID.AddInteger(VT4.getRawBits());
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(4);
+ Array[0] = VT1;
+ Array[1] = VT2;
+ Array[2] = VT3;
+ Array[3] = VT4;
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) {
+ unsigned NumVTs = VTs.size();
+ FoldingSetNodeID ID;
+ ID.AddInteger(NumVTs);
+ for (unsigned index = 0; index < NumVTs; index++) {
+ ID.AddInteger(VTs[index].getRawBits());
+ }
+
+ void *IP = nullptr;
+ SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
+ if (!Result) {
+ EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+ std::copy(VTs.begin(), VTs.end(), Array);
+ Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
+ VTListMap.InsertNode(Result, IP);
+ }
+ return Result->getSDVTList();
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands. If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists. If the resultant node does not exist in the DAG, the
+/// input node is returned. As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
+ assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op == N->getOperand(0)) return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = nullptr;
+
+ // Now we update the operands.
+ N->OperandList[0].set(Op);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
+ assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+ // Check to see if there is no change.
+ if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+ return N; // No operands changed, just return the input node.
+
+ // See if the modified node already exists.
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = nullptr;
+
+ // Now we update the operands.
+ if (N->OperandList[0] != Op1)
+ N->OperandList[0].set(Op1);
+ if (N->OperandList[1] != Op2)
+ N->OperandList[1].set(Op2);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return UpdateNodeOperands(N, Ops);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+ return UpdateNodeOperands(N, Ops);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+ SDValue Op3, SDValue Op4, SDValue Op5) {
+ SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+ return UpdateNodeOperands(N, Ops);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
+ unsigned NumOps = Ops.size();
+ assert(N->getNumOperands() == NumOps &&
+ "Update with wrong number of operands");
+
+ // If no operands changed just return the input node.
+ if (std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+ return N;
+
+ // See if the modified node already exists.
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos))
+ return Existing;
+
+ // Nope it doesn't. Remove the node from its current place in the maps.
+ if (InsertPos)
+ if (!RemoveNodeFromCSEMaps(N))
+ InsertPos = nullptr;
+
+ // Now we update the operands.
+ for (unsigned i = 0; i != NumOps; ++i)
+ if (N->OperandList[i] != Ops[i])
+ N->OperandList[i].set(Ops[i]);
+
+ // If this gets put into a CSE map, add it.
+ if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+ return N;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+ // Unlike the code in MorphNodeTo that does this, we don't need to
+ // watch for dead nodes here.
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ Use.set(SDValue());
+ }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, None);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT, ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return SelectNodeTo(N, MachineOpc, VTs, None);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+ SDVTList VTs,ArrayRef<SDValue> Ops) {
+ SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
+ // Reset the NodeID to -1.
+ New->setNodeId(-1);
+ if (New != N) {
+ ReplaceAllUsesWith(N, New);
+ RemoveDeadNode(N);
+ }
+ return New;
+}
+
+/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+/// For IROrder, we keep the smaller of the two
+SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &OLoc) {
+ DebugLoc NLoc = N->getDebugLoc();
+ if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
+ N->setDebugLoc(DebugLoc());
+ }
+ unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
+ N->setIROrder(Order);
+ return N;
+}
+
+/// MorphNodeTo - This *mutates* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one. Note that the SDLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG.
+/// As a consequence it isn't appropriate to use from within the DAG combiner or
+/// the legalizer which maintain worklists that would need to be updated when
+/// deleting things.
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+ SDVTList VTs, ArrayRef<SDValue> Ops) {
+ // If an identical node already exists, use it.
+ void *IP = nullptr;
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opc, VTs, Ops);
+ if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP))
+ return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N));
+ }
+
+ if (!RemoveNodeFromCSEMaps(N))
+ IP = nullptr;
+
+ // Start the morphing.
+ N->NodeType = Opc;
+ N->ValueList = VTs.VTs;
+ N->NumValues = VTs.NumVTs;
+
+ // Clear the operands list, updating used nodes to remove this from their
+ // use list. Keep track of any operands that become dead as a result.
+ SmallPtrSet<SDNode*, 16> DeadNodeSet;
+ for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+ SDUse &Use = *I++;
+ SDNode *Used = Use.getNode();
+ Use.set(SDValue());
+ if (Used->use_empty())
+ DeadNodeSet.insert(Used);
+ }
+
+ // For MachineNode, initialize the memory references information.
+ if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N))
+ MN->setMemRefs(nullptr, nullptr);
+
+ // Swap for an appropriately sized array from the recycler.
+ removeOperands(N);
+ createOperands(N, Ops);
+
+ // Delete any nodes that are still dead after adding the uses for the
+ // new operands.
+ if (!DeadNodeSet.empty()) {
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SDNode *N : DeadNodeSet)
+ if (N->use_empty())
+ DeadNodes.push_back(N);
+ RemoveDeadNodes(DeadNodes);
+ }
+
+ if (IP)
+ CSEMap.InsertNode(N, IP); // Memoize the new node.
+ return N;
+}
+
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node. If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, None);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT, ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, None);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, SDValue Op1,
+ SDValue Op2, SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ SDValue Ops[] = { Op1, Op2, Op3 };
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
+ ArrayRef<EVT> ResultTys,
+ ArrayRef<SDValue> Ops) {
+ SDVTList VTs = getVTList(ResultTys);
+ return getMachineNode(Opcode, dl, VTs, Ops);
+}
+
+MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
+ SDVTList VTs,
+ ArrayRef<SDValue> Ops) {
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
+ MachineSDNode *N;
+ void *IP = nullptr;
+
+ if (DoCSE) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ~Opcode, VTs, Ops);
+ IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL));
+ }
+ }
+
+ // Allocate a new MachineSDNode.
+ N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
+ createOperands(N, Ops);
+
+ if (DoCSE)
+ CSEMap.InsertNode(N, IP);
+
+ InsertNode(N);
+ return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT,
+ SDValue Operand) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
+ SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ VT, Operand, SRIdxVal);
+ return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
+ SDValue Operand, SDValue Subreg) {
+ SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32);
+ SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ VT, Operand, Subreg, SRIdxVal);
+ return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops,
+ const SDNodeFlags *Flags) {
+ if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) {
+ if (Flags)
+ E->intersectFlagsWith(Flags);
+ return E;
+ }
+ }
+ return nullptr;
+}
+
+/// getDbgValue - Creates a SDDbgValue node.
+///
+/// SDNode
+SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
+ unsigned R, bool IsIndirect, uint64_t Off,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc())
+ SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
+}
+
+/// Constant
+SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
+ const Value *C, uint64_t Off,
+ const DebugLoc &DL, unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);
+}
+
+/// FrameIndex
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
+ unsigned FI, uint64_t Off,
+ const DebugLoc &DL,
+ unsigned O) {
+ assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);
+}
+
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SDNode::use_iterator &UI;
+ SDNode::use_iterator &UE;
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ // Increment the iterator as needed.
+ while (UI != UE && N == *UI)
+ ++UI;
+ }
+
+public:
+ RAUWUpdateListener(SelectionDAG &d,
+ SDNode::use_iterator &ui,
+ SDNode::use_iterator &ue)
+ : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
+};
+
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
+ SDNode *From = FromN.getNode();
+ assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+ "Cannot replace with this method!");
+ assert(From != To.getNode() && "Cannot replace uses of with self");
+
+ // Iterate over all the existing uses of From. New uses will be added
+ // to the beginning of the use list, which we avoid visiting.
+ // This specifically avoids visiting uses of From that arise while the
+ // replacement is happening, because any such uses would be the result
+ // of CSE: If an existing node looks like From after one of its operands
+ // is replaced by To, we don't want to replace of all its users with To
+ // too. See PR3018 for more info.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // Preserve Debug Values
+ TransferDbgValues(FromN, To);
+
+ // If we just RAUW'd the root, take note.
+ if (FromN == getRoot())
+ setRoot(To);
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ assert((!From->hasAnyUseOfValue(i) ||
+ From->getValueType(i) == To->getValueType(i)) &&
+ "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+ // Handle the trivial case.
+ if (From == To)
+ return;
+
+ // Preserve Debug Info. Only do this if there's a use.
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ if (From->hasAnyUseOfValue(i)) {
+ assert((i < To->getNumValues()) && "Invalid To location");
+ TransferDbgValues(SDValue(From, i), SDValue(To, i));
+ }
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ ++UI;
+ Use.setNode(To);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To, getRoot().getResNo()));
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values. To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
+ if (From->getNumValues() == 1) // Handle the simple case efficiently.
+ return ReplaceAllUsesWith(SDValue(From, 0), To[0]);
+
+ // Preserve Debug Info.
+ for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+ TransferDbgValues(SDValue(From, i), *To);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+ const SDValue &ToOp = To[Use.getResNo()];
+ ++UI;
+ Use.set(ToOp);
+ } while (UI != UE && *UI == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot().getNode())
+ setRoot(SDValue(To[getRoot().getResNo()]));
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
+ // Handle the really simple, really trivial case efficiently.
+ if (From == To) return;
+
+ // Handle the simple, trivial, case efficiently.
+ if (From.getNode()->getNumValues() == 1) {
+ ReplaceAllUsesWith(From, To);
+ return;
+ }
+
+ // Preserve Debug Info.
+ TransferDbgValues(From, To);
+
+ // Iterate over just the existing users of From. See the comments in
+ // the ReplaceAllUsesWith above.
+ SDNode::use_iterator UI = From.getNode()->use_begin(),
+ UE = From.getNode()->use_end();
+ RAUWUpdateListener Listener(*this, UI, UE);
+ while (UI != UE) {
+ SDNode *User = *UI;
+ bool UserRemovedFromCSEMaps = false;
+
+ // A user can appear in a use list multiple times, and when this
+ // happens the uses are usually next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ SDUse &Use = UI.getUse();
+
+ // Skip uses of different values from the same node.
+ if (Use.getResNo() != From.getResNo()) {
+ ++UI;
+ continue;
+ }
+
+ // If this node hasn't been modified yet, it's still in the CSE maps,
+ // so remove its old self from the CSE maps.
+ if (!UserRemovedFromCSEMaps) {
+ RemoveNodeFromCSEMaps(User);
+ UserRemovedFromCSEMaps = true;
+ }
+
+ ++UI;
+ Use.set(To);
+ } while (UI != UE && *UI == User);
+
+ // We are iterating over all uses of the From node, so if a use
+ // doesn't use the specific value, no changes are made.
+ if (!UserRemovedFromCSEMaps)
+ continue;
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+
+ // If we just RAUW'd the root, take note.
+ if (From == getRoot())
+ setRoot(To);
+}
+
+namespace {
+ /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+ /// to record information about a use.
+ struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+ };
+
+ /// operator< - Sort Memos by User.
+ bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+ }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone. The same value
+/// may appear in both the From and To list. The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+ const SDValue *To,
+ unsigned Num){
+ // Handle the simple, trivial case efficiently.
+ if (Num == 1)
+ return ReplaceAllUsesOfValueWith(*From, *To);
+
+ TransferDbgValues(*From, *To);
+
+ // Read up all the uses and make records of them. This helps
+ // processing new uses that are introduced during the
+ // replacement process.
+ SmallVector<UseMemo, 4> Uses;
+ for (unsigned i = 0; i != Num; ++i) {
+ unsigned FromResNo = From[i].getResNo();
+ SDNode *FromNode = From[i].getNode();
+ for (SDNode::use_iterator UI = FromNode->use_begin(),
+ E = FromNode->use_end(); UI != E; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getResNo() == FromResNo) {
+ UseMemo Memo = { *UI, i, &Use };
+ Uses.push_back(Memo);
+ }
+ }
+ }
+
+ // Sort the uses, so that all the uses from a given User are together.
+ std::sort(Uses.begin(), Uses.end());
+
+ for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+ UseIndex != UseIndexEnd; ) {
+ // We know that this user uses some value of From. If it is the right
+ // value, update it.
+ SDNode *User = Uses[UseIndex].User;
+
+ // This node is about to morph, remove its old self from the CSE maps.
+ RemoveNodeFromCSEMaps(User);
+
+ // The Uses array is sorted, so all the uses for a given User
+ // are next to each other in the list.
+ // To help reduce the number of CSE recomputations, process all
+ // the uses of this user that we can find this way.
+ do {
+ unsigned i = Uses[UseIndex].Index;
+ SDUse &Use = *Uses[UseIndex].Use;
+ ++UseIndex;
+
+ Use.set(To[i]);
+ } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+ // Now that we have modified User, add it back to the CSE maps. If it
+ // already exists there, recursively merge the results together.
+ AddModifiedNodeToCSEMaps(User);
+ }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+ unsigned DAGSize = 0;
+
+ // SortedPos tracks the progress of the algorithm. Nodes before it are
+ // sorted, nodes after it are unsorted. When the algorithm completes
+ // it is at the end of the list.
+ allnodes_iterator SortedPos = allnodes_begin();
+
+ // Visit all the nodes. Move nodes with no operands to the front of
+ // the list immediately. Annotate nodes that do have operands with their
+ // operand count. Before we do this, the Node Id fields of the nodes
+ // may contain arbitrary values. After, the Node Id fields for nodes
+ // before SortedPos will contain the topological sort index, and the
+ // Node Id fields for nodes At SortedPos and after will contain the
+ // count of outstanding operands.
+ for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+ SDNode *N = &*I++;
+ checkForCycles(N, this);
+ unsigned Degree = N->getNumOperands();
+ if (Degree == 0) {
+ // A node with no uses, add it to the result array immediately.
+ N->setNodeId(DAGSize++);
+ allnodes_iterator Q(N);
+ if (Q != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Temporarily use the Node Id as scratch space for the degree count.
+ N->setNodeId(Degree);
+ }
+ }
+
+ // Visit all the nodes. As we iterate, move nodes into sorted order,
+ // such that by the time the end is reached all nodes will be sorted.
+ for (SDNode &Node : allnodes()) {
+ SDNode *N = &Node;
+ checkForCycles(N, this);
+ // N is in sorted position, so all its uses have one less operand
+ // that needs to be sorted.
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *P = *UI;
+ unsigned Degree = P->getNodeId();
+ assert(Degree != 0 && "Invalid node degree");
+ --Degree;
+ if (Degree == 0) {
+ // All of P's operands are sorted, so P may sorted now.
+ P->setNodeId(DAGSize++);
+ if (P->getIterator() != SortedPos)
+ SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+ assert(SortedPos != AllNodes.end() && "Overran node list");
+ ++SortedPos;
+ } else {
+ // Update P's outstanding operand count.
+ P->setNodeId(Degree);
+ }
+ }
+ if (Node.getIterator() == SortedPos) {
+#ifndef NDEBUG
+ allnodes_iterator I(N);
+ SDNode *S = &*++I;
+ dbgs() << "Overran sorted position:\n";
+ S->dumprFull(this); dbgs() << "\n";
+ dbgs() << "Checking if this is due to cycles\n";
+ checkForCycles(this, true);
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+
+ assert(SortedPos == AllNodes.end() &&
+ "Topological sort incomplete!");
+ assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+ "First node in topological sort is not the entry token!");
+ assert(AllNodes.front().getNodeId() == 0 &&
+ "First node in topological sort has non-zero id!");
+ assert(AllNodes.front().getNumOperands() == 0 &&
+ "First node in topological sort has operands!");
+ assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+ "Last node in topologic sort has unexpected id!");
+ assert(AllNodes.back().use_empty() &&
+ "Last node in topologic sort has users!");
+ assert(DAGSize == allnodes_size() && "Node count mismatch!");
+ return DAGSize;
+}
+
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+ if (SD) {
+ assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue());
+ SD->setHasDebugValue(true);
+ }
+ DbgInfo->add(DB, SD, isParameter);
+}
+
+/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+ if (From == To || !From.getNode()->getHasDebugValue())
+ return;
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
+ for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
+ I != E; ++I) {
+ SDDbgValue *Dbg = *I;
+ // Only add Dbgvalues attached to same ResNo.
+ if (Dbg->getKind() == SDDbgValue::SDNODE &&
+ Dbg->getResNo() == From.getResNo()) {
+ SDDbgValue *Clone =
+ getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
+ To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
+ Dbg->getDebugLoc(), Dbg->getOrder());
+ AddDbgValue(Clone, ToNode, false);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SDNode Class
+//===----------------------------------------------------------------------===//
+
+bool llvm::isNullConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isNullValue();
+}
+
+bool llvm::isNullFPConstant(SDValue V) {
+ ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+ return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
+bool llvm::isAllOnesConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isAllOnesValue();
+}
+
+bool llvm::isOneConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isOne();
+}
+
+bool llvm::isBitwiseNot(SDValue V) {
+ return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
+}
+
+HandleSDNode::~HandleSDNode() {
+ DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
+ const DebugLoc &DL,
+ const GlobalValue *GA, EVT VT,
+ int64_t o, unsigned char TF)
+ : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+ TheGlobal = GA;
+}
+
+AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
+ EVT VT, unsigned SrcAS,
+ unsigned DestAS)
+ : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)),
+ SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {}
+
+MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
+ : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant());
+ assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(isNonTemporal() == MMO->isNonTemporal() &&
+ "Non-temporal encoding error!");
+ // We check here that the size of the memory operand fits within the size of
+ // the MMO. This is because the MMO might indicate only a possible address
+ // range instead of specifying the affected memory addresses precisely.
+ assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+ AddNodeIDNode(ID, this);
+}
+
+namespace {
+ struct EVTArray {
+ std::vector<EVT> VTs;
+
+ EVTArray() {
+ VTs.reserve(MVT::LAST_VALUETYPE);
+ for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+ VTs.push_back(MVT((MVT::SimpleValueType)i));
+ }
+ };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+ if (VT.isExtended()) {
+ sys::SmartScopedLock<true> Lock(*VTMutex);
+ return &(*EVTs->insert(VT).first);
+ } else {
+ assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+ "Value type out of range!");
+ return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+ }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value. This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ // TODO: Only iterate over uses of a given value of the node
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ if (UI.getUse().getResNo() == Value) {
+ if (NUses == 0)
+ return false;
+ --NUses;
+ }
+ }
+
+ // Found exactly the right number of uses?
+ return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+ assert(Value < getNumValues() && "Bad value!");
+
+ for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+ if (UI.getUse().getResNo() == Value)
+ return true;
+
+ return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(const SDNode *N) const {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (User == this)
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(const SDNode *N) const {
+ for (const SDValue &Op : N->op_values())
+ if (*this == Op)
+ return true;
+ return false;
+}
+
+bool SDNode::isOperandOf(const SDNode *N) const {
+ for (const SDValue &Op : N->op_values())
+ if (this == Op.getNode())
+ return true;
+ return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+ unsigned Depth) const {
+ if (*this == Dest) return true;
+
+ // Don't search too deeply, we just want to be able to see through
+ // TokenFactor's etc.
+ if (Depth == 0) return false;
+
+ // If this is a token factor, all inputs to the TF happen in parallel. If any
+ // of the operands of the TF does not reach dest, then we cannot do the xform.
+ if (getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+ if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return false;
+ return true;
+ }
+
+ // Loads don't have side effects, look through them.
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+ if (!Ld->isVolatile())
+ return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+ }
+ return false;
+}
+
+bool SDNode::hasPredecessor(const SDNode *N) const {
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(this);
+ return hasPredecessorHelper(N, Visited, Worklist);
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+ assert(Num < NumOperands && "Invalid child # of SDNode!");
+ return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+const SDNodeFlags *SDNode::getFlags() const {
+ if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
+ return &FlagsNode->Flags;
+ return nullptr;
+}
+
+void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) {
+ if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
+ FlagsNode->Flags.intersectWith(Flags);
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+ assert(N->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ unsigned i;
+ for (i= 0; i != NE; ++i) {
+ for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] =
+ getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+ getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout())));
+ } else {
+ // A scalar operand; just use it as is.
+ Operands[j] = Operand;
+ }
+ }
+
+ switch (N->getOpcode()) {
+ default: {
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
+ N->getFlags()));
+ break;
+ }
+ case ISD::VSELECT:
+ Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
+ break;
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+ getShiftAmountOperand(Operands[0].getValueType(),
+ Operands[1])));
+ break;
+ case ISD::SIGN_EXTEND_INREG:
+ case ISD::FP_ROUND_INREG: {
+ EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+ Operands[0],
+ getValueType(ExtVT)));
+ }
+ }
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(getUNDEF(EltVT));
+
+ return getNode(ISD::BUILD_VECTOR, dl,
+ EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars);
+}
+
+bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
+ LoadSDNode *Base,
+ unsigned Bytes,
+ int Dist) const {
+ if (LD->isVolatile() || Base->isVolatile())
+ return false;
+ if (LD->isIndexed() || Base->isIndexed())
+ return false;
+ if (LD->getChain() != Base->getChain())
+ return false;
+ EVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LD->getOperand(1);
+ SDValue BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ // Handle X + C.
+ if (isBaseWithConstantOffset(Loc)) {
+ int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc) {
+ // If the base location is a simple address with no offset itself, then
+ // the second load's first add operand should be the base address.
+ if (LocOffset == Dist * (int)Bytes)
+ return true;
+ } else if (isBaseWithConstantOffset(BaseLoc)) {
+ // The base location itself has an offset, so subtract that value from the
+ // second load's offset before comparing to distance * size.
+ int64_t BOffset =
+ cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue();
+ if (Loc.getOperand(0) == BaseLoc.getOperand(0)) {
+ if ((LocOffset - BOffset) == Dist * (int)Bytes)
+ return true;
+ }
+ }
+ }
+ const GlobalValue *GV1 = nullptr;
+ const GlobalValue *GV2 = nullptr;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+ // If this is a GlobalAddress + cst, return the alignment.
+ const GlobalValue *GV;
+ int64_t GVOffset = 0;
+ if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+ unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
+ APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+ llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
+ getDataLayout());
+ unsigned AlignBits = KnownZero.countTrailingOnes();
+ unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+ if (Align)
+ return MinAlign(Align, GVOffset);
+ }
+
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = 1 << 31;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (isBaseWithConstantOffset(Ptr) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != (1 << 31)) {
+ const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ return FIInfoAlign;
+ }
+
+ return 0;
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split (or expanded) into two not necessarily identical pieces.
+std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
+ // Currently all types are split in half.
+ EVT LoVT, HiVT;
+ if (!VT.isVector()) {
+ LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT);
+ } else {
+ unsigned NumElements = VT.getVectorNumElements();
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+ LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
+ NumElements/2);
+ }
+ return std::make_pair(LoVT, HiVT);
+}
+
+/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the
+/// low/high part.
+std::pair<SDValue, SDValue>
+SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
+ const EVT &HiVT) {
+ assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <=
+ N.getValueType().getVectorNumElements() &&
+ "More vector elements requested than available!");
+ SDValue Lo, Hi;
+ Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
+ getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+ Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
+ getConstant(LoVT.getVectorNumElements(), DL,
+ TLI->getVectorIdxTy(getDataLayout())));
+ return std::make_pair(Lo, Hi);
+}
+
+void SelectionDAG::ExtractVectorElements(SDValue Op,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start, unsigned Count) {
+ EVT VT = Op.getValueType();
+ if (Count == 0)
+ Count = VT.getVectorNumElements();
+
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxTy = TLI->getVectorIdxTy(getDataLayout());
+ SDLoc SL(Op);
+ for (unsigned i = Start, e = Start + Count; i != e; ++i) {
+ Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Op, getConstant(i, SL, IdxTy)));
+ }
+}
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+ return getGlobal()->getType()->getAddressSpace();
+}
+
+
+Type *ConstantPoolSDNode::getType() const {
+ if (isMachineConstantPoolEntry())
+ return Val.MachineCPVal->getType();
+ return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+ APInt &SplatUndef,
+ unsigned &SplatBitSize,
+ bool &HasAnyUndefs,
+ unsigned MinSplatBits,
+ bool isBigEndian) const {
+ EVT VT = getValueType(0);
+ assert(VT.isVector() && "Expected a vector type");
+ unsigned sz = VT.getSizeInBits();
+ if (MinSplatBits > sz)
+ return false;
+
+ SplatValue = APInt(sz, 0);
+ SplatUndef = APInt(sz, 0);
+
+ // Get the bits. Bits with undefined values (when the corresponding element
+ // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+ // in SplatValue. If any of the values are not constant, give up and return
+ // false.
+ unsigned int nOps = getNumOperands();
+ assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+
+ for (unsigned j = 0; j < nOps; ++j) {
+ unsigned i = isBigEndian ? nOps-1-j : j;
+ SDValue OpVal = getOperand(i);
+ unsigned BitPos = j * EltBitSize;
+
+ if (OpVal.isUndef())
+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+ else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
+ zextOrTrunc(sz) << BitPos;
+ else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+ else
+ return false;
+ }
+
+ // The build_vector is all constants or undefs. Find the smallest element
+ // size that splats the vector.
+
+ HasAnyUndefs = (SplatUndef != 0);
+ while (sz > 8) {
+
+ unsigned HalfSize = sz / 2;
+ APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatValue.trunc(HalfSize);
+ APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = SplatUndef.trunc(HalfSize);
+
+ // If the two halves do not match (ignoring undef bits), stop here.
+ if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+ MinSplatBits > HalfSize)
+ break;
+
+ SplatValue = HighValue | LowValue;
+ SplatUndef = HighUndef & LowUndef;
+
+ sz = HalfSize;
+ }
+
+ SplatBitSize = sz;
+ return true;
+}
+
+SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+ if (UndefElements) {
+ UndefElements->clear();
+ UndefElements->resize(getNumOperands());
+ }
+ SDValue Splatted;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ SDValue Op = getOperand(i);
+ if (Op.isUndef()) {
+ if (UndefElements)
+ (*UndefElements)[i] = true;
+ } else if (!Splatted) {
+ Splatted = Op;
+ } else if (Splatted != Op) {
+ return SDValue();
+ }
+ }
+
+ if (!Splatted) {
+ assert(getOperand(0).isUndef() &&
+ "Can only have a splat without a constant for all undefs.");
+ return getOperand(0);
+ }
+
+ return Splatted;
+}
+
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
+}
+
+ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
+}
+
+int32_t
+BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
+ uint32_t BitWidth) const {
+ if (ConstantFPSDNode *CN =
+ dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
+ bool IsExact;
+ APSInt IntVal(BitWidth);
+ const APFloat &APF = CN->getValueAPF();
+ if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
+ APFloat::opOK ||
+ !IsExact)
+ return -1;
+
+ return IntVal.exactLogBase2();
+ }
+ return -1;
+}
+
+bool BuildVectorSDNode::isConstant() const {
+ for (const SDValue &Op : op_values()) {
+ unsigned Opc = Op.getOpcode();
+ if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP)
+ return false;
+ }
+ return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i, e;
+ for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+ /* search */;
+
+ assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+ // Make sure all remaining elements are either undef or the same as the first
+ // non-undef value.
+ for (int Idx = Mask[i]; i != e; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Idx)
+ return false;
+ return true;
+}
+
+// \brief Returns the SDNode if it is a constant integer BuildVector
+// or constant integer.
+SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
+ if (isa<ConstantSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+ return N.getNode();
+ // Treat a GlobalAddress supporting constant offset folding as a
+ // constant integer.
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N))
+ if (GA->getOpcode() == ISD::GlobalAddress &&
+ TLI->isOffsetFoldingLegal(GA))
+ return GA;
+ return nullptr;
+}
+
+#ifndef NDEBUG
+static void checkForCyclesHelper(const SDNode *N,
+ SmallPtrSetImpl<const SDNode*> &Visited,
+ SmallPtrSetImpl<const SDNode*> &Checked,
+ const llvm::SelectionDAG *DAG) {
+ // If this node has already been checked, don't check it again.
+ if (Checked.count(N))
+ return;
+
+ // If a node has already been visited on this depth-first walk, reject it as
+ // a cycle.
+ if (!Visited.insert(N).second) {
+ errs() << "Detected cycle in SelectionDAG\n";
+ dbgs() << "Offending node:\n";
+ N->dumprFull(DAG); dbgs() << "\n";
+ abort();
+ }
+
+ for (const SDValue &Op : N->op_values())
+ checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG);
+
+ Checked.insert(N);
+ Visited.erase(N);
+}
+#endif
+
+void llvm::checkForCycles(const llvm::SDNode *N,
+ const llvm::SelectionDAG *DAG,
+ bool force) {
+#ifndef NDEBUG
+ bool check = force;
+#ifdef EXPENSIVE_CHECKS
+ check = true;
+#endif // EXPENSIVE_CHECKS
+ if (check) {
+ assert(N && "Checking nonexistent SDNode");
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked, DAG);
+ }
+#endif // !NDEBUG
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) {
+ checkForCycles(DAG->getRoot().getNode(), DAG, force);
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 000000000000..e03282cad6b8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,9127 @@
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <utility>
+using namespace llvm;
+
+#define DEBUG_TYPE "isel"
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision),
+ cl::init(0));
+
+static cl::opt<bool>
+EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
+ cl::desc("Enable fast-math-flags for DAG nodes"));
+
+/// Minimum jump table density for normal functions.
+static cl::opt<unsigned>
+JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
+ cl::desc("Minimum density for building a jump table in "
+ "a normal function"));
+
+/// Minimum jump table density for -Os or -Oz functions.
+static cl::opt<unsigned>
+OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden,
+ cl::desc("Minimum density for building a jump table in "
+ "an optsize function"));
+
+
+// Limit the width of DAG chains. This is important in general to prevent
+// DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It is easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static const unsigned MaxParallelChains = 64;
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V);
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger than ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V,
+ Optional<ISD::NodeType> AssertOp = None) {
+ if (ValueVT.isVector())
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
+ PartVT, ValueVT, V);
+
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ if (NumParts > 1) {
+ // Assemble the value from multiple parts.
+ if (ValueVT.isInteger()) {
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned ValueBits = ValueVT.getSizeInBits();
+
+ // Assemble the power of 2 part.
+ unsigned RoundParts = NumParts & (NumParts - 1) ?
+ 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundBits = PartBits * RoundParts;
+ EVT RoundVT = RoundBits == ValueBits ?
+ ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+ SDValue Lo, Hi;
+
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+ if (RoundParts > 2) {
+ Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
+ PartVT, HalfVT, V);
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
+ RoundParts / 2, PartVT, HalfVT, V);
+ } else {
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
+ }
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
+
+ if (RoundParts < NumParts) {
+ // Assemble the trailing non-power-of-2 part.
+ unsigned OddParts = NumParts - RoundParts;
+ EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+ Hi = getCopyFromParts(DAG, DL,
+ Parts + RoundParts, OddParts, PartVT, OddVT, V);
+
+ // Combine the round and odd parts.
+ Lo = Val;
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+ EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+ Hi =
+ DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
+ }
+ } else if (PartVT.isFloatingPoint()) {
+ // FP split into multiple FP parts (for ppcf128)
+ assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
+ "Unexpected split");
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
+ if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
+ std::swap(Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
+ } else {
+ // FP split into integer parts (soft fp)
+ assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+ !PartVT.isVector() && "Unexpected split");
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
+ }
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ // PartEVT is the type of the register class that holds the value.
+ // ValueVT is the type of the inline asm operation.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
+ ValueVT.bitsLT(PartEVT)) {
+ // For an FP value in an integer part, we need to truncate to the right
+ // width first.
+ PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
+ }
+
+ // Handle types that have the same size.
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle types with different sizes.
+ if (PartEVT.isInteger() && ValueVT.isInteger()) {
+ if (ValueVT.bitsLT(PartEVT)) {
+ // For a truncate, see if we have any information to
+ // indicate whether the truncated bits will always be
+ // zero or sign-extension.
+ if (AssertOp.hasValue())
+ Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
+ DAG.getValueType(ValueVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ // FP_ROUND's are always exact here.
+ if (ValueVT.bitsLT(Val.getValueType()))
+ return DAG.getNode(
+ ISD::FP_ROUND, DL, ValueVT, Val,
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
+
+ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
+ }
+
+ llvm_unreachable("Unknown mismatch!");
+}
+
+static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
+ const Twine &ErrMsg) {
+ const Instruction *I = dyn_cast_or_null<Instruction>(V);
+ if (!V)
+ return Ctx.emitError(ErrMsg);
+
+ const char *AsmError = ", possible invalid constraint for vector type";
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (isa<InlineAsm>(CI->getCalledValue()))
+ return Ctx.emitError(I, ErrMsg + AsmError);
+
+ return Ctx.emitError(I, ErrMsg);
+}
+
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent. If the parts combine to a
+/// type larger than ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue *Parts, unsigned NumParts,
+ MVT PartVT, EVT ValueVT, const Value *V) {
+ assert(ValueVT.isVector() && "Not a vector value");
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ // Handle a multi-element vector.
+ if (NumParts > 1) {
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT.getSizeInBits() ==
+ Parts[0].getSimpleValueType().getSizeInBits() &&
+ "Part type sizes don't match!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+ PartVT, IntermediateVT, V);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT, V);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, ValueVT, Ops);
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ EVT PartEVT = Val.getValueType();
+
+ if (PartEVT == ValueVT)
+ return Val;
+
+ if (PartEVT.isVector()) {
+ // If the element type of the source/dest vectors are the same, but the
+ // parts vector has more elements than the value vector, then we have a
+ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
+ // elements we want.
+ if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ "Cannot narrow, it would be a lossy transformation");
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+ "Cannot handle this kind of promotion");
+ // Promoted vector extract
+ return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
+
+ }
+
+ // Trivial bitcast if the types are the same size and the destination
+ // vector type is legal.
+ if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+ TLI.isTypeLegal(ValueVT))
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Handle cases such as i8 -> <1 x i1>
+ if (ValueVT.getVectorNumElements() != 1) {
+ diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+ "non-trivial scalar-to-vector conversion");
+ return DAG.getUNDEF(ValueVT);
+ }
+
+ if (ValueVT.getVectorNumElements() == 1 &&
+ ValueVT.getVectorElementType() != PartEVT)
+ Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V);
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts. If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ const Value *V,
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ EVT ValueVT = Val.getValueType();
+
+ // Handle the vector case separately.
+ if (ValueVT.isVector())
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
+
+ unsigned PartBits = PartVT.getSizeInBits();
+ unsigned OrigNumParts = NumParts;
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
+ "Copying to an illegal type!");
+
+ if (NumParts == 0)
+ return;
+
+ assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
+
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else {
+ if (ValueVT.isFloatingPoint()) {
+ // FP values need to be bitcast, then extended if they are being put
+ // into a larger container.
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ }
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartEVT != ValueVT);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+ ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ if (PartVT == MVT::x86mmx)
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
+
+ if (NumParts == 1) {
+ if (PartEVT != ValueVT) {
+ diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+ "scalar-to-vector conversion failed");
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(RoundBits, DL));
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
+
+ if (DAG.getDataLayout().isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
+ }
+ }
+ }
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+}
+
+
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ MVT PartVT, const Value *V) {
+ EVT ValueVT = Val.getValueType();
+ assert(ValueVT.isVector() && "Not a vector");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (NumParts == 1) {
+ EVT PartEVT = PartVT;
+ if (PartEVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+ } else if (PartVT.isVector() &&
+ PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
+ PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+
+ for (unsigned i = ValueVT.getVectorNumElements(),
+ e = PartVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ElementVT));
+
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops);
+
+ // FIXME: Use CONCAT for 2x -> 4x.
+
+ //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+ //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else if (PartVT.isVector() &&
+ PartEVT.getVectorElementType().bitsGE(
+ ValueVT.getVectorElementType()) &&
+ PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+ // Promoted vector extract
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ } else{
+ // Vector -> scalar conversion.
+ assert(ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ }
+
+ Parts[0] = Val;
+ return;
+ }
+
+ // Handle a multi-element vector.
+ EVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+ IntermediateVT,
+ NumIntermediates, RegisterVT);
+ unsigned NumElements = ValueVT.getVectorNumElements();
+
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+ // Split the vector into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ for (unsigned i = 0; i != NumIntermediates; ++i) {
+ if (IntermediateVT.isVector())
+ Ops[i] =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+ DAG.getConstant(i * (NumElements / NumIntermediates), DL,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ else
+ Ops[i] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Split the intermediate operands into legal parts.
+ if (NumParts == NumIntermediates) {
+ // If the register was not expanded, promote or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, split each the value into
+ // legal parts.
+ assert(NumIntermediates != 0 && "division by zero");
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
+ }
+}
+
+RegsForValue::RegsForValue() {}
+
+RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
+ EVT valuevt)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+
+RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL, unsigned Reg, Type *Ty) {
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs);
+
+ for (EVT ValueVT : ValueVTs) {
+ unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+ FunctionLoweringInfo &FuncInfo,
+ const SDLoc &dl, SDValue &Chain,
+ SDValue *Flag, const Value *V) const {
+ // A Value with type {} or [0 x %t] needs no registers.
+ if (ValueVTs.empty())
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (!Flag) {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ } else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+
+ Chain = P.getValue(1);
+ Parts[i] = P;
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ !RegisterVT.isInteger() || RegisterVT.isVector())
+ continue;
+
+ const FunctionLoweringInfo::LiveOutInfo *LOI =
+ FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+ if (!LOI)
+ continue;
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI->NumSignBits;
+ unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
+
+ if (NumZeroBits == RegSize) {
+ // The current value is a zero.
+ // Explicitly express that as it would be easier for
+ // optimizations to kick in.
+ Parts[i] = DAG.getConstant(0, dl, RegisterVT);
+ continue;
+ }
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize) {
+ isSExt = true; // ASSERT SEXT 1
+ FromVT = MVT::i1;
+ } else if (NumZeroBits >= RegSize - 1) {
+ isSExt = false; // ASSERT ZEXT 1
+ FromVT = MVT::i1;
+ } else if (NumSignBits > RegSize - 8) {
+ isSExt = true; // ASSERT SEXT 8
+ FromVT = MVT::i8;
+ } else if (NumZeroBits >= RegSize - 8) {
+ isSExt = false; // ASSERT ZEXT 8
+ FromVT = MVT::i8;
+ } else if (NumSignBits > RegSize - 16) {
+ isSExt = true; // ASSERT SEXT 16
+ FromVT = MVT::i16;
+ } else if (NumZeroBits >= RegSize - 16) {
+ isSExt = false; // ASSERT ZEXT 16
+ FromVT = MVT::i16;
+ } else if (NumSignBits > RegSize - 32) {
+ isSExt = true; // ASSERT SEXT 32
+ FromVT = MVT::i32;
+ } else if (NumZeroBits >= RegSize - 32) {
+ isSExt = false; // ASSERT ZEXT 32
+ FromVT = MVT::i32;
+ } else {
+ continue;
+ }
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+ NumRegs, RegisterVT, ValueVT, V);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
+ const SDLoc &dl, SDValue &Chain, SDValue *Flag,
+ const Value *V,
+ ISD::NodeType PreferredExtendType) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ISD::NodeType ExtendKind = PreferredExtendType;
+
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ MVT RegisterVT = RegVTs[Value];
+
+ if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
+ &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (!Flag) {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ } else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx, const SDLoc &dl,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ if (HasMatching)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ else if (!Regs.empty() &&
+ TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+ // Put the register class of the virtual registers in the flag word. That
+ // way, later passes can recompute register class constraints for inline
+ // assembly as well as normal instructions.
+ // Don't do this for tied operands that can use the regclass information
+ // from the def.
+ const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
+
+ SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
+ Ops.push_back(Res);
+
+ unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+ MVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ unsigned TheReg = Regs[Reg++];
+ Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
+
+ if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
+ // If we clobbered the stack pointer, MFI should know about it.
+ assert(DAG.getMachineFunction().getFrameInfo()->
+ hasOpaqueSPAdjustment());
+ }
+ }
+ }
+}
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li) {
+ AA = &aa;
+ GFI = gfi;
+ LibInfo = li;
+ DL = &DAG.getDataLayout();
+ Context = DAG.getContext();
+ LPadToCallSiteMap.clear();
+}
+
+/// clear - Clear out the current SelectionDAG and the associated
+/// state and prepare this SelectionDAGBuilder object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGBuilder::clear() {
+ NodeMap.clear();
+ UnusedArgNodeMap.clear();
+ PendingLoads.clear();
+ PendingExports.clear();
+ CurInst = nullptr;
+ HasTailCall = false;
+ SDNodeOrder = LowestSDNodeOrder;
+ StatepointLowering.clear();
+}
+
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is separated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+ DanglingDebugInfoMap.clear();
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGBuilder::getRoot() {
+ if (PendingLoads.empty())
+ return DAG.getRoot();
+
+ if (PendingLoads.size() == 1) {
+ SDValue Root = PendingLoads[0];
+ DAG.setRoot(Root);
+ PendingLoads.clear();
+ return Root;
+ }
+
+ // Otherwise, we have to make a token factor node.
+ SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+ PendingLoads);
+ PendingLoads.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGBuilder::getControlRoot() {
+ SDValue Root = DAG.getRoot();
+
+ if (PendingExports.empty())
+ return Root;
+
+ // Turn all of the CopyToReg chains into one factored node.
+ if (Root.getOpcode() != ISD::EntryToken) {
+ unsigned i = 0, e = PendingExports.size();
+ for (; i != e; ++i) {
+ assert(PendingExports[i].getNode()->getNumOperands() > 1);
+ if (PendingExports[i].getNode()->getOperand(0) == Root)
+ break; // Don't add the root if we already indirectly depend on it.
+ }
+
+ if (i == e)
+ PendingExports.push_back(Root);
+ }
+
+ Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+ PendingExports);
+ PendingExports.clear();
+ DAG.setRoot(Root);
+ return Root;
+}
+
+/// Copy swift error to the final virtual register at end of a basic block, as
+/// specified by SwiftErrorWorklist, if necessary.
+static void copySwiftErrorsToFinalVRegs(SelectionDAGBuilder &SDB) {
+ const TargetLowering &TLI = SDB.DAG.getTargetLoweringInfo();
+ if (!TLI.supportSwiftError())
+ return;
+
+ if (!SDB.FuncInfo.SwiftErrorWorklist.count(SDB.FuncInfo.MBB))
+ return;
+
+ // Go through entries in SwiftErrorWorklist, and create copy as necessary.
+ FunctionLoweringInfo::SwiftErrorVRegs &WorklistEntry =
+ SDB.FuncInfo.SwiftErrorWorklist[SDB.FuncInfo.MBB];
+ FunctionLoweringInfo::SwiftErrorVRegs &MapEntry =
+ SDB.FuncInfo.SwiftErrorMap[SDB.FuncInfo.MBB];
+ for (unsigned I = 0, E = WorklistEntry.size(); I < E; I++) {
+ unsigned WorkReg = WorklistEntry[I];
+
+ // Find the swifterror virtual register for the value in SwiftErrorMap.
+ unsigned MapReg = MapEntry[I];
+ assert(TargetRegisterInfo::isVirtualRegister(MapReg) &&
+ "Entries in SwiftErrorMap should be virtual registers");
+
+ if (WorkReg == MapReg)
+ continue;
+
+ // Create copy from SwiftErrorMap to SwiftWorklist.
+ auto &DL = SDB.DAG.getDataLayout();
+ SDValue CopyNode = SDB.DAG.getCopyToReg(
+ SDB.getRoot(), SDB.getCurSDLoc(), WorkReg,
+ SDB.DAG.getRegister(MapReg, EVT(TLI.getPointerTy(DL))));
+ MapEntry[I] = WorkReg;
+ SDB.DAG.setRoot(CopyNode);
+ }
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+ // Set up outgoing PHI node register values before emitting the terminator.
+ if (isa<TerminatorInst>(&I)) {
+ copySwiftErrorsToFinalVRegs(*this);
+ HandlePHINodesInSuccessorBlocks(I.getParent());
+ }
+
+ ++SDNodeOrder;
+
+ CurInst = &I;
+
+ visit(I.getOpcode(), I);
+
+ if (!isa<TerminatorInst>(&I) && !HasTailCall &&
+ !isStatepoint(&I)) // statepoints handle their exports internally
+ CopyToExportRegsIfNeeded(&I);
+
+ CurInst = nullptr;
+}
+
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
+ // Note: this doesn't use InstVisitor, because it has to work with
+ // ConstantExpr's in addition to instructions.
+ switch (Opcode) {
+ default: llvm_unreachable("Unknown instruction type encountered!");
+ // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
+#include "llvm/IR/Instruction.def"
+ }
+}
+
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+ SDValue Val) {
+ DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+ if (DDI.getDI()) {
+ const DbgValueInst *DI = DDI.getDI();
+ DebugLoc dl = DDI.getdl();
+ unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+ DILocalVariable *Variable = DI->getVariable();
+ DIExpression *Expr = DI->getExpression();
+ assert(Variable->isValidLocationForIntrinsic(dl) &&
+ "Expected inlined-at fields to agree");
+ uint64_t Offset = DI->getOffset();
+ SDDbgValue *SDV;
+ if (Val.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
+ Val)) {
+ SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
+ false, Offset, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, Val.getNode(), false);
+ }
+ } else
+ DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ DanglingDebugInfoMap[V] = DanglingDebugInfo();
+ }
+}
+
+/// getCopyFromRegs - If there was virtual register allocated for the value V
+/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
+ DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ SDValue Result;
+
+ if (It != FuncInfo.ValueMap.end()) {
+ unsigned InReg = It->second;
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), InReg, Ty);
+ SDValue Chain = DAG.getEntryNode();
+ Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+ resolveDanglingDebugInfo(V, Result);
+ }
+
+ return Result;
+}
+
+/// getValue - Return an SDValue for the given Value.
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+ // If we already have an SDValue for this value, use it. It's important
+ // to do this first, so that we don't create a CopyFromReg if we already
+ // have a regular SDValue.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // If there's a virtual register allocated and initialized for this
+ // value, use it.
+ if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
+ return copyFromReg;
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+// Return true if SDValue exists for the given Value
+bool SelectionDAGBuilder::findValue(const Value *V) const {
+ return (NodeMap.find(V) != NodeMap.end()) ||
+ (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+ // If we already have an SDValue for this value, use it.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) {
+ if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
+ // Remove the debug location from the node as the node is about to be used
+ // in a location which may differ from the original debug location. This
+ // is relevant to Constant and ConstantFP nodes because they can appear
+ // as constant expressions inside PHI nodes.
+ N->setDebugLoc(DebugLoc());
+ }
+ return N;
+ }
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
+ return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (const Constant *C = dyn_cast<Constant>(V)) {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return DAG.getConstant(*CI, getCurSDLoc(), VT);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
+
+ if (isa<ConstantPointerNull>(C)) {
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return DAG.getConstant(0, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout(), AS));
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
+
+ if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+ return DAG.getUNDEF(VT);
+
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ visit(CE->getOpcode(), *CE);
+ SDValue N1 = NodeMap[V];
+ assert(N1.getNode() && "visit didn't populate the NodeMap!");
+ return N1;
+ }
+
+ if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ SmallVector<SDValue, 4> Constants;
+ for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+ OI != OE; ++OI) {
+ SDNode *Val = getValue(*OI).getNode();
+ // If the operand is an empty aggregate, there are no values.
+ if (!Val) continue;
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Constants.push_back(SDValue(Val, i));
+ }
+
+ return DAG.getMergeValues(Constants, getCurSDLoc());
+ }
+
+ if (const ConstantDataSequential *CDS =
+ dyn_cast<ConstantDataSequential>(C)) {
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+ SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+ // Add each leaf value from the operand to the Constants list
+ // to form a flattened list of all the values.
+ for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+ Ops.push_back(SDValue(Val, i));
+ }
+
+ if (isa<ArrayType>(CDS->getType()))
+ return DAG.getMergeValues(Ops, getCurSDLoc());
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(),
+ VT, Ops);
+ }
+
+ if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
+ assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+ "Unknown struct or array constant!");
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
+ unsigned NumElts = ValueVTs.size();
+ if (NumElts == 0)
+ return SDValue(); // empty struct
+ SmallVector<SDValue, 4> Constants(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ EVT EltVT = ValueVTs[i];
+ if (isa<UndefValue>(C))
+ Constants[i] = DAG.getUNDEF(EltVT);
+ else if (EltVT.isFloatingPoint())
+ Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
+ else
+ Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
+ }
+
+ return DAG.getMergeValues(Constants, getCurSDLoc());
+ }
+
+ if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+ return DAG.getBlockAddress(BA, VT);
+
+ VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = VecTy->getNumElements();
+
+ // Now that we know the number and type of the elements, get that number of
+ // elements into the Ops array based on what kind of constant it is.
+ SmallVector<SDValue, 16> Ops;
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+ for (unsigned i = 0; i != NumElements; ++i)
+ Ops.push_back(getValue(CV->getOperand(i)));
+ } else {
+ assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+ EVT EltVT =
+ TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
+
+ SDValue Op;
+ if (EltVT.isFloatingPoint())
+ Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
+ else
+ Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
+ Ops.assign(NumElements, Op);
+ }
+
+ // Create a BUILD_VECTOR node.
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops);
+ }
+
+ // If this is a static alloca, generate it as the frameindex instead of
+ // computation.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ return DAG.getFrameIndex(SI->second,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ }
+
+ // If this is an instruction which fast-isel has deferred, select it now.
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+ RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
+ Inst->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+ }
+
+ llvm_unreachable("Can't get register for value!");
+}
+
+void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
+ MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
+ // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ CatchPadMBB->setIsEHFuncletEntry();
+
+ DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
+}
+
+void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
+ // Update machine-CFG edge.
+ MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
+ FuncInfo.MBB->addSuccessor(TargetMBB);
+
+ auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsSEH = isAsynchronousEHPersonality(Pers);
+ if (IsSEH) {
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (TargetMBB != NextBlock(FuncInfo.MBB) ||
+ TM.getOptLevel() == CodeGenOpt::None)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB)));
+ return;
+ }
+
+ // Figure out the funclet membership for the catchret's successor.
+ // This will be used by the FuncletLayout pass to determine how to order the
+ // BB's.
+ // A 'catchret' returns to the outer scope's color.
+ Value *ParentPad = I.getCatchSwitchParentPad();
+ const BasicBlock *SuccessorColor;
+ if (isa<ConstantTokenNone>(ParentPad))
+ SuccessorColor = &FuncInfo.Fn->getEntryBlock();
+ else
+ SuccessorColor = cast<Instruction>(ParentPad)->getParent();
+ assert(SuccessorColor && "No parent funclet for catchret!");
+ MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
+ assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
+
+ // Create the terminator node.
+ SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(TargetMBB),
+ DAG.getBasicBlock(SuccessorColorMBB));
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
+ // Don't emit any special code for the cleanuppad instruction. It just marks
+ // the start of a funclet.
+ FuncInfo.MBB->setIsEHFuncletEntry();
+ FuncInfo.MBB->setIsCleanupFuncletEntry();
+}
+
+/// When an invoke or a cleanupret unwinds to the next EH pad, there are
+/// many places it could ultimately go. In the IR, we have a single unwind
+/// destination, but in the machine CFG, we enumerate all the possible blocks.
+/// This function skips over imaginary basic blocks that hold catchswitch
+/// instructions, and finds all the "real" machine
+/// basic block destinations. As those destinations may not be successors of
+/// EHPadBB, here we also calculate the edge probability to those destinations.
+/// The passed-in Prob is the edge probability to EHPadBB.
+static void findUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality =
+ classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ break;
+ } else if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+}
+
+void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
+ // Update successor info.
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ auto UnwindDest = I.getUnwindDest();
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability UnwindDestProb =
+ (BPI && UnwindDest)
+ ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
+ }
+ FuncInfo.MBB->normalizeSuccProbs();
+
+ // Create the terminator node.
+ SDValue Ret =
+ DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+ DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
+ report_fatal_error("visitCatchSwitch not yet implemented!");
+}
+
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto &DL = DAG.getDataLayout();
+ SDValue Chain = getControlRoot();
+ SmallVector<ISD::OutputArg, 8> Outs;
+ SmallVector<SDValue, 8> OutVals;
+
+ // Calls to @llvm.experimental.deoptimize don't generate a return value, so
+ // lower
+ //
+ // %val = call <ty> @llvm.experimental.deoptimize()
+ // ret <ty> %val
+ //
+ // differently.
+ if (I.getParent()->getTerminatingDeoptimizeCall()) {
+ LowerDeoptimizingReturn();
+ return;
+ }
+
+ if (!FuncInfo.CanLowerReturn) {
+ unsigned DemoteReg = FuncInfo.DemoteRegister;
+ const Function *F = I.getParent()->getParent();
+
+ // Emit a store of the return value through the virtual register.
+ // Leave Outs empty so that LowerReturn won't try to load return
+ // registers the usual way.
+ SmallVector<EVT, 1> PtrValueVTs;
+ ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
+ PtrValueVTs);
+
+ SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+ DemoteReg, PtrValueVTs[0]);
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ SmallVector<SDValue, 4> Chains(NumValues);
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
+ RetPtr.getValueType(), RetPtr,
+ DAG.getIntPtrConstant(Offsets[i],
+ getCurSDLoc()),
+ &Flags);
+ Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ // FIXME: better loc info would be nice.
+ Add, MachinePointerInfo());
+ }
+
+ Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
+ MVT::Other, Chains);
+ } else if (I.getNumOperands() != 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues) {
+ SDValue RetOp = getValue(I.getOperand(0));
+
+ const Function *F = I.getParent()->getParent();
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+ if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ LLVMContext &Context = F->getContext();
+ bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ Attribute::InReg);
+
+ for (unsigned j = 0; j != NumValues; ++j) {
+ EVT VT = ValueVTs[j];
+
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+ VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
+
+ unsigned NumParts = TLI.getNumRegisters(Context, VT);
+ MVT PartVT = TLI.getRegisterType(Context, VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurSDLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, &I, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (RetInReg)
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (ExtendKind == ISD::SIGN_EXTEND)
+ Flags.setSExt();
+ else if (ExtendKind == ISD::ZERO_EXTEND)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ VT, /*isfixed=*/true, 0, 0));
+ OutVals.push_back(Parts[i]);
+ }
+ }
+ }
+ }
+
+ // Push in swifterror virtual register as the last element of Outs. This makes
+ // sure swifterror virtual register will be returned in the swifterror
+ // physical register.
+ const Function *F = I.getParent()->getParent();
+ if (TLI.supportSwiftError() &&
+ F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ Flags.setSwiftError();
+ Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
+ EVT(TLI.getPointerTy(DL)) /*argvt*/,
+ true /*isfixed*/, 1 /*origidx*/,
+ 0 /*partOffs*/));
+ // Create SDNode for the swifterror virtual register.
+ OutVals.push_back(DAG.getRegister(FuncInfo.SwiftErrorMap[FuncInfo.MBB][0],
+ EVT(TLI.getPointerTy(DL))));
+ }
+
+ bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ CallingConv::ID CallConv =
+ DAG.getMachineFunction().getFunction()->getCallingConv();
+ Chain = DAG.getTargetLoweringInfo().LowerReturn(
+ Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
+
+ // Verify that the target's LowerReturn behaved as expected.
+ assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+ "LowerReturn didn't return a valid chain!");
+
+ // Update the DAG with the new chain value resulting from return lowering.
+ DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ return;
+
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ assert(!V->use_empty() && "Unused value assigned virtual registers!");
+ CopyValueToVirtualRegister(V, VMI->second);
+ }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
+ // No need to export constants.
+ if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+ // Already exported?
+ if (FuncInfo.isExportedInst(V)) return;
+
+ unsigned Reg = FuncInfo.InitializeRegForValue(V);
+ CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
+ const BasicBlock *FromBB) {
+ // The operands of the setcc have to be in this block. We don't know
+ // how to export them from some other block.
+ if (const Instruction *VI = dyn_cast<Instruction>(V)) {
+ // Can export from current BB.
+ if (VI->getParent() == FromBB)
+ return true;
+
+ // Is already exported, noop.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // If this is an argument, we can export it if the BB is the entry block or
+ // if it is already exported.
+ if (isa<Argument>(V)) {
+ if (FromBB == &FromBB->getParent()->getEntryBlock())
+ return true;
+
+ // Otherwise, can only export this if it is already exported.
+ return FuncInfo.isExportedInst(V);
+ }
+
+ // Otherwise, constants can always be exported.
+ return true;
+}
+
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+BranchProbability
+SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
+ if (!BPI) {
+ // If BPI is not available, set the default probability as 1 / N, where N is
+ // the number of successors.
+ auto SuccSize = std::max<uint32_t>(
+ std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
+ return BranchProbability(1, SuccSize);
+ }
+ return BPI->getEdgeProbability(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst,
+ BranchProbability Prob) {
+ if (!FuncInfo.BPI)
+ Src->addSuccessorWithoutProb(Dst);
+ else {
+ if (Prob.isUnknown())
+ Prob = getEdgeProbability(Src, Dst);
+ Src->addSuccessor(Dst, Prob);
+ }
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ BranchProbability TProb,
+ BranchProbability FProb) {
+ const BasicBlock *BB = CurBB->getBasicBlock();
+
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ // The operands of the cmp have to be in this block. We don't know
+ // how to export them from some other block. If this is the first block
+ // of the sequence, no exporting is needed.
+ if (CurBB == SwitchBB ||
+ (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+ isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+ ISD::CondCode Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = getICmpCondCode(IC->getPredicate());
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
+ Condition = getFCmpCondCode(FC->getPredicate());
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ }
+
+ CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
+ TBB, FBB, CurBB, TProb, FProb);
+ SwitchCases.push_back(CB);
+ return;
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ nullptr, TBB, FBB, CurBB, TProb, FProb);
+ SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc,
+ BranchProbability TProb,
+ BranchProbability FProb) {
+ // If this node is not part of the or/and tree, emit it as a branch.
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+ (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+ !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
+ TProb, FProb);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI(CurBB);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
+ NewTrueProb, NewFalseProb);
+
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+ Probs[0], Probs[1]);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
+ // Emit the LHS condition.
+ FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
+ NewTrueProb, NewFalseProb);
+
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+ Probs[0], Probs[1]);
+ }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
+ if (Cases.size() != 2) return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].CC == Cases[1].CC &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
+}
+
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+ MachineBasicBlock *BrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges.
+ MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+ if (I.isUnconditional()) {
+ // Update machine-CFG edges.
+ BrMBB->addSuccessor(Succ0MBB);
+
+ // If this is not a fall-through branch or optimizations are switched off,
+ // emit the branch.
+ if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Succ0MBB)));
+
+ return;
+ }
+
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = I.getCondition();
+ MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive, this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ //
+ if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+ Instruction::BinaryOps Opcode = BOp->getOpcode();
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
+ !I.getMetadata(LLVMContext::MD_unpredictable) &&
+ (Opcode == Instruction::And || Opcode == Instruction::Or)) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+ Opcode,
+ getEdgeProbability(BrMBB, Succ0MBB),
+ getEdgeProbability(BrMBB, Succ1MBB));
+ // If the compares in later blocks need to use values not currently
+ // exported from this block, export them now. This block should always
+ // be the first entry.
+ assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (ShouldEmitAsBranches(SwitchCases)) {
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ }
+
+ // Emit the branch for this block.
+ visitSwitchCase(SwitchCases[0], BrMBB);
+ SwitchCases.erase(SwitchCases.begin());
+ return;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+ FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+
+ SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+ nullptr, Succ0MBB, Succ1MBB, BrMBB);
+
+ // Use visitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ visitSwitchCase(CB, BrMBB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB) {
+ SDValue Cond;
+ SDValue CondLHS = getValue(CB.CmpLHS);
+ SDLoc dl = getCurSDLoc();
+
+ // Build the setcc now.
+ if (!CB.CmpMHS) {
+ // Fold "(X == true)" to X and "(X == false)" to !X to
+ // handle common cases produced by branch lowering.
+ if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ)
+ Cond = CondLHS;
+ else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+ CB.CC == ISD::SETEQ) {
+ SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+ } else
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ SDValue CmpOp = getValue(CB.CmpMHS);
+ EVT VT = CmpOp.getValueType();
+
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
+ ISD::SETLE);
+ } else {
+ SDValue SUB = DAG.getNode(ISD::SUB, dl,
+ VT, CmpOp, DAG.getConstant(Low, dl, VT));
+ Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+ DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
+ }
+ }
+
+ // Update successor info
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
+ SwitchBB->normalizeSuccProbs();
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == NextBlock(SwitchBB)) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
+ Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+ }
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(CB.TrueBB));
+
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
+ JT.Reg, PTy);
+ SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+ SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
+ MVT::Other, Index.getValue(1),
+ Table, Index);
+ DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+ JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB) {
+ SDLoc dl = getCurSDLoc();
+
+ // Subtract the lowest switch case value from the value being switched on and
+ // conditional branch to default mbb if the result is greater than the
+ // difference between smallest and largest cases.
+ SDValue SwitchOp = getValue(JTH.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
+ DAG.getConstant(JTH.First, dl, VT));
+
+ // The SDNode we just created, which holds the value being switched on minus
+ // the smallest case value, needs to be copied to a virtual register so it
+ // can be used as an index into the jump table in a subsequent basic block.
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
+
+ unsigned JumpTableReg =
+ FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
+ JumpTableReg, SwitchOp);
+ JT.Reg = JumpTableReg;
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the largest
+ // case in the switch.
+ SDValue CMP = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB));
+
+ DAG.setRoot(BrCond);
+}
+
+/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
+/// variable if there exists one.
+static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue &Chain) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ MachineFunction &MF = DAG.getMachineFunction();
+ Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent());
+ MachineSDNode *Node =
+ DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
+ if (Global) {
+ MachinePointerInfo MPInfo(Global);
+ MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
+ DAG.getEVTAlignment(PtrTy));
+ Node->setMemRefs(MemRefs, MemRefs + 1);
+ }
+ return SDValue(Node, 0);
+}
+
+/// Codegen a new tail for a stack protector check ParentMBB which has had its
+/// tail spliced into a stack protector check success bb.
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB) {
+
+ // First create the loads to the guard/stack slot for the comparison.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+
+ MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI->getStackProtectorIndex();
+
+ SDValue Guard;
+ SDLoc dl = getCurSDLoc();
+ SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
+ const Module &M = *ParentBB->getParent()->getFunction()->getParent();
+ unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
+
+ // Generate code to load the content of the guard slot.
+ SDValue StackSlot = DAG.getLoad(
+ PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
+ MachineMemOperand::MOVolatile);
+
+ // Retrieve guard check function, nullptr if instrumentation is inlined.
+ if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
+ // The target provides a guard check function to validate the guard value.
+ // Generate a call to that function with the content of the guard slot as
+ // argument.
+ auto *Fn = cast<Function>(GuardCheck);
+ FunctionType *FnTy = Fn->getFunctionType();
+ assert(FnTy->getNumParams() == 1 && "Invalid function signature");
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = StackSlot;
+ Entry.Ty = FnTy->getParamType(0);
+ if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
+ Entry.isInReg = true;
+ Args.push_back(Entry);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(DAG.getEntryNode())
+ .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
+ getValue(GuardCheck), std::move(Args));
+
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+ DAG.setRoot(Result.second);
+ return;
+ }
+
+ // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
+ // Otherwise, emit a volatile load to retrieve the stack guard value.
+ SDValue Chain = DAG.getEntryNode();
+ if (TLI.useLoadStackGuardNode()) {
+ Guard = getLoadStackGuard(DAG, dl, Chain);
+ } else {
+ const Value *IRGuard = TLI.getSDagStackGuard(M);
+ SDValue GuardPtr = getValue(IRGuard);
+
+ Guard =
+ DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
+ Align, MachineMemOperand::MOVolatile);
+ }
+
+ // Perform the comparison via a subtract/getsetcc.
+ EVT VT = Guard.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
+
+ SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
+
+ // If the sub is not 0, then we know the guard/stackslot do not equal, so
+ // branch to failure MBB.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, StackSlot.getOperand(0),
+ Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
+ // Otherwise branch to success MBB.
+ SDValue Br = DAG.getNode(ISD::BR, dl,
+ MVT::Other, BrCond,
+ DAG.getBasicBlock(SPD.getSuccessMBB()));
+
+ DAG.setRoot(Br);
+}
+
+/// Codegen the failure basic block for a stack protector check.
+///
+/// A failure stack protector machine basic block consists simply of a call to
+/// __stack_chk_fail().
+///
+/// For a high level explanation of how this fits into the stack protector
+/// generation see the comment on the declaration of class
+/// StackProtectorDescriptor.
+void
+SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Chain =
+ TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
+ None, false, getCurSDLoc(), false, false).second;
+ DAG.setRoot(Chain);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ SDLoc dl = getCurSDLoc();
+
+ // Subtract the minimum value
+ SDValue SwitchOp = getValue(B.SValue);
+ EVT VT = SwitchOp.getValueType();
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
+ DAG.getConstant(B.First, dl, VT));
+
+ // Check range
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue RangeCmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
+
+ // Determine the type of the test operands.
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT))
+ UsePtrType = true;
+ else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ if (UsePtrType) {
+ VT = TLI.getPointerTy(DAG.getDataLayout());
+ Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
+ }
+
+ B.RegVT = VT.getSimpleVT();
+ B.Reg = FuncInfo.CreateReg(B.RegVT);
+ SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
+
+ MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+ SwitchBB->normalizeSuccProbs();
+
+ SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, CopyTo, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (MBB != NextBlock(SwitchBB))
+ BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
+ DAG.getBasicBlock(MBB));
+
+ DAG.setRoot(BrRange);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ BranchProbability BranchProbToNext,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ SDLoc dl = getCurSDLoc();
+ MVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
+ SDValue Cmp;
+ unsigned PopCount = countPopulation(B.Mask);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
+ ISD::SETEQ);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
+ ISD::SETNE);
+ } else {
+ // Make desired shift
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
+ DAG.getConstant(1, dl, VT), ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, dl,
+ VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
+ Cmp = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+ AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
+ }
+
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
+
+ SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, getControlRoot(),
+ Cmp, DAG.getBasicBlock(B.TargetBB));
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (NextMBB != NextBlock(SwitchBB))
+ BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
+ DAG.getBasicBlock(NextMBB));
+
+ DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+ MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
+
+ // Retrieve successors. Look through artificial IR level blocks like
+ // catchswitch for successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+ const BasicBlock *EHPadBB = I.getSuccessor(1);
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower invokes with arbitrary operand bundles yet!");
+
+ const Value *Callee(I.getCalledValue());
+ const Function *Fn = dyn_cast<Function>(Callee);
+ if (isa<InlineAsm>(Callee))
+ visitInlineAsm(&I);
+ else if (Fn && Fn->isIntrinsic()) {
+ switch (Fn->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Cannot invoke this intrinsic");
+ case Intrinsic::donothing:
+ // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ visitPatchpoint(&I, EHPadBB);
+ break;
+ case Intrinsic::experimental_gc_statepoint:
+ LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
+ break;
+ }
+ } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
+ // Currently we do not lower any intrinsic calls with deopt operand bundles.
+ // Eventually we will support lowering the @llvm.experimental.deoptimize
+ // intrinsic, and right now there are no plans to support other intrinsics
+ // with deopt state.
+ LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
+ } else {
+ LowerCallTo(&I, getValue(Callee), false, EHPadBB);
+ }
+
+ // If the value of the invoke is used outside of its defining block, make it
+ // available as a virtual register.
+ // We already took care of the exported value for the statepoint instruction
+ // during call to the LowerStatepoint.
+ if (!isStatepoint(I)) {
+ CopyToExportRegsIfNeeded(&I);
+ }
+
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+ findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
+
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, Return);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
+
+ // Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
+ MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+ llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+ assert(FuncInfo.MBB->isEHPad() &&
+ "Call to landingpad not in landing pad!");
+
+ MachineBasicBlock *MBB = FuncInfo.MBB;
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ AddLandingPadInfo(LP, MMI, MBB);
+
+ // If there aren't registers to copy the values into (e.g., during SjLj
+ // exceptions), then don't bother to create these DAG nodes.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
+ if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+ TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+ return;
+
+ // If landingpad's return type is token type, we don't create DAG nodes
+ // for its exception pointer and selector value. The extraction of exception
+ // pointer or selector value from token type landingpads is not currently
+ // supported.
+ if (LP.getType()->isTokenTy())
+ return;
+
+ SmallVector<EVT, 2> ValueVTs;
+ SDLoc dl = getCurSDLoc();
+ ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
+ assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
+
+ // Get the two live-in registers as SDValues. The physregs have already been
+ // copied into virtual registers.
+ SDValue Ops[2];
+ if (FuncInfo.ExceptionPointerVirtReg) {
+ Ops[0] = DAG.getZExtOrTrunc(
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ FuncInfo.ExceptionPointerVirtReg,
+ TLI.getPointerTy(DAG.getDataLayout())),
+ dl, ValueVTs[0]);
+ } else {
+ Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ Ops[1] = DAG.getZExtOrTrunc(
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ FuncInfo.ExceptionSelectorVirtReg,
+ TLI.getPointerTy(DAG.getDataLayout())),
+ dl, ValueVTs[1]);
+
+ // Merge into one.
+ SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(ValueVTs), Ops);
+ setValue(&LP, Res);
+}
+
+void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
+#ifndef NDEBUG
+ for (const CaseCluster &CC : Clusters)
+ assert(CC.Low == CC.High && "Input clusters must be single-case");
+#endif
+
+ std::sort(Clusters.begin(), Clusters.end(),
+ [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Low->getValue().slt(b.Low->getValue());
+ });
+
+ // Merge adjacent clusters with the same destination.
+ const unsigned N = Clusters.size();
+ unsigned DstIndex = 0;
+ for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
+ CaseCluster &CC = Clusters[SrcIndex];
+ const ConstantInt *CaseVal = CC.Low;
+ MachineBasicBlock *Succ = CC.MBB;
+
+ if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
+ (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
+ // If this case has the same successor and is a neighbour, merge it into
+ // the previous cluster.
+ Clusters[DstIndex - 1].High = CaseVal;
+ Clusters[DstIndex - 1].Prob += CC.Prob;
+ } else {
+ std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
+ sizeof(Clusters[SrcIndex]));
+ }
+ }
+ Clusters.resize(DstIndex);
+}
+
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+ if (JTCases[i].first.HeaderBB == First)
+ JTCases[i].first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+ if (BitTestCases[i].Parent == First)
+ BitTestCases[i].Parent = Last;
+}
+
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+ MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
+
+ // Update machine-CFG edges with unique successors.
+ SmallSet<BasicBlock*, 32> Done;
+ for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+ BasicBlock *BB = I.getSuccessor(i);
+ bool Inserted = Done.insert(BB).second;
+ if (!Inserted)
+ continue;
+
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
+ addSuccessorWithProb(IndirectBrMBB, Succ);
+ }
+ IndirectBrMBB->normalizeSuccProbs();
+
+ DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
+ MVT::Other, getControlRoot(),
+ getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
+ if (DAG.getTarget().Options.TrapUnreachable)
+ DAG.setRoot(
+ DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+}
+
+void SelectionDAGBuilder::visitFSub(const User &I) {
+ // -0.0 - X --> fneg
+ Type *Ty = I.getType();
+ if (isa<Constant>(I.getOperand(0)) &&
+ I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
+ Op2.getValueType(), Op2));
+ return;
+ }
+
+ visitBinary(I, ISD::FSUB);
+}
+
+/// Checks if the given instruction performs a vector reduction, in which case
+/// we have the freedom to alter the elements in the result as long as the
+/// reduction of them stays unchanged.
+static bool isVectorReductionOp(const User *I) {
+ const Instruction *Inst = dyn_cast<Instruction>(I);
+ if (!Inst || !Inst->getType()->isVectorTy())
+ return false;
+
+ auto OpCode = Inst->getOpcode();
+ switch (OpCode) {
+ case Instruction::Add:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ break;
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
+ if (FPOp->getFastMathFlags().unsafeAlgebra())
+ break;
+ // Fall through.
+ default:
+ return false;
+ }
+
+ unsigned ElemNum = Inst->getType()->getVectorNumElements();
+ unsigned ElemNumToReduce = ElemNum;
+
+ // Do DFS search on the def-use chain from the given instruction. We only
+ // allow four kinds of operations during the search until we reach the
+ // instruction that extracts the first element from the vector:
+ //
+ // 1. The reduction operation of the same opcode as the given instruction.
+ //
+ // 2. PHI node.
+ //
+ // 3. ShuffleVector instruction together with a reduction operation that
+ // does a partial reduction.
+ //
+ // 4. ExtractElement that extracts the first element from the vector, and we
+ // stop searching the def-use chain here.
+ //
+ // 3 & 4 above perform a reduction on all elements of the vector. We push defs
+ // from 1-3 to the stack to continue the DFS. The given instruction is not
+ // a reduction operation if we meet any other instructions other than those
+ // listed above.
+
+ SmallVector<const User *, 16> UsersToVisit{Inst};
+ SmallPtrSet<const User *, 16> Visited;
+ bool ReduxExtracted = false;
+
+ while (!UsersToVisit.empty()) {
+ auto User = UsersToVisit.back();
+ UsersToVisit.pop_back();
+ if (!Visited.insert(User).second)
+ continue;
+
+ for (const auto &U : User->users()) {
+ auto Inst = dyn_cast<Instruction>(U);
+ if (!Inst)
+ return false;
+
+ if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
+ if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
+ if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra())
+ return false;
+ UsersToVisit.push_back(U);
+ } else if (const ShuffleVectorInst *ShufInst =
+ dyn_cast<ShuffleVectorInst>(U)) {
+ // Detect the following pattern: A ShuffleVector instruction together
+ // with a reduction that do partial reduction on the first and second
+ // ElemNumToReduce / 2 elements, and store the result in
+ // ElemNumToReduce / 2 elements in another vector.
+
+ unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
+ if (ResultElements < ElemNum)
+ return false;
+
+ if (ElemNumToReduce == 1)
+ return false;
+ if (!isa<UndefValue>(U->getOperand(1)))
+ return false;
+ for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
+ if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
+ return false;
+ for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
+ if (ShufInst->getMaskValue(i) != -1)
+ return false;
+
+ // There is only one user of this ShuffleVector instruction, which
+ // must be a reduction operation.
+ if (!U->hasOneUse())
+ return false;
+
+ auto U2 = dyn_cast<Instruction>(*U->user_begin());
+ if (!U2 || U2->getOpcode() != OpCode)
+ return false;
+
+ // Check operands of the reduction operation.
+ if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
+ (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
+ UsersToVisit.push_back(U2);
+ ElemNumToReduce /= 2;
+ } else
+ return false;
+ } else if (isa<ExtractElementInst>(U)) {
+ // At this moment we should have reduced all elements in the vector.
+ if (ElemNumToReduce != 1)
+ return false;
+
+ const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
+ if (!Val || Val->getZExtValue() != 0)
+ return false;
+
+ ReduxExtracted = true;
+ } else
+ return false;
+ }
+ }
+ return ReduxExtracted;
+}
+
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ bool nuw = false;
+ bool nsw = false;
+ bool exact = false;
+ bool vec_redux = false;
+ FastMathFlags FMF;
+
+ if (const OverflowingBinaryOperator *OFBinOp =
+ dyn_cast<const OverflowingBinaryOperator>(&I)) {
+ nuw = OFBinOp->hasNoUnsignedWrap();
+ nsw = OFBinOp->hasNoSignedWrap();
+ }
+ if (const PossiblyExactOperator *ExactOp =
+ dyn_cast<const PossiblyExactOperator>(&I))
+ exact = ExactOp->isExact();
+ if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
+ FMF = FPOp->getFastMathFlags();
+
+ if (isVectorReductionOp(&I)) {
+ vec_redux = true;
+ DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
+ }
+
+ SDNodeFlags Flags;
+ Flags.setExact(exact);
+ Flags.setNoSignedWrap(nsw);
+ Flags.setNoUnsignedWrap(nuw);
+ Flags.setVectorReduction(vec_redux);
+ if (EnableFMFInDAG) {
+ Flags.setAllowReciprocal(FMF.allowReciprocal());
+ Flags.setNoInfs(FMF.noInfs());
+ Flags.setNoNaNs(FMF.noNaNs());
+ Flags.setNoSignedZeros(FMF.noSignedZeros());
+ Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
+ }
+ SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
+ Op1, Op2, &Flags);
+ setValue(&I, BinNodeValue);
+}
+
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
+ Op2.getValueType(), DAG.getDataLayout());
+
+ // Coerce the shift amount to the right type if we can.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ unsigned ShiftSize = ShiftTy.getSizeInBits();
+ unsigned Op2Size = Op2.getValueType().getSizeInBits();
+ SDLoc DL = getCurSDLoc();
+
+ // If the operand is smaller than the shift count type, promote it.
+ if (ShiftSize > Op2Size)
+ Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
+ // If the operand is larger than the shift count type but the shift
+ // count type has enough bits to represent any shift value, truncate
+ // it now. This is a common case and it exposes the truncate to
+ // optimization early.
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+ // Otherwise we'll need to temporarily settle for some other convenient
+ // type. Type legalization will make adjustments once the shiftee is split.
+ else
+ Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
+ }
+
+ bool nuw = false;
+ bool nsw = false;
+ bool exact = false;
+
+ if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
+
+ if (const OverflowingBinaryOperator *OFBinOp =
+ dyn_cast<const OverflowingBinaryOperator>(&I)) {
+ nuw = OFBinOp->hasNoUnsignedWrap();
+ nsw = OFBinOp->hasNoSignedWrap();
+ }
+ if (const PossiblyExactOperator *ExactOp =
+ dyn_cast<const PossiblyExactOperator>(&I))
+ exact = ExactOp->isExact();
+ }
+ SDNodeFlags Flags;
+ Flags.setExact(exact);
+ Flags.setNoSignedWrap(nsw);
+ Flags.setNoUnsignedWrap(nuw);
+ SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
+ &Flags);
+ setValue(&I, Res);
+}
+
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+
+ SDNodeFlags Flags;
+ Flags.setExact(isa<PossiblyExactOperator>(&I) &&
+ cast<PossiblyExactOperator>(&I)->isExact());
+ setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
+ Op2, &Flags));
+}
+
+void SelectionDAGBuilder::visitICmp(const User &I) {
+ ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+ predicate = IC->getPredicate();
+ else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+ predicate = ICmpInst::Predicate(IC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(const User &I) {
+ FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+ if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ predicate = FC->getPredicate();
+ else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+ predicate = FCmpInst::Predicate(FC->getPredicate());
+ SDValue Op1 = getValue(I.getOperand(0));
+ SDValue Op2 = getValue(I.getOperand(1));
+ ISD::CondCode Condition = getFCmpCondCode(predicate);
+
+ // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
+ // FIXME: We should propagate the fast-math-flags to the DAG node itself for
+ // further optimization, but currently FMF is only applicable to binary nodes.
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
+}
+
+// Check if the condition of the select has one use or two users that are both
+// selects with the same condition.
+static bool hasOnlySelectUsers(const Value *Cond) {
+ return std::all_of(Cond->user_begin(), Cond->user_end(), [](const Value *V) {
+ return isa<SelectInst>(V);
+ });
+}
+
+void SelectionDAGBuilder::visitSelect(const User &I) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
+ ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SDValue Cond = getValue(I.getOperand(0));
+ SDValue LHSVal = getValue(I.getOperand(1));
+ SDValue RHSVal = getValue(I.getOperand(2));
+ auto BaseOps = {Cond};
+ ISD::NodeType OpCode = Cond.getValueType().isVector() ?
+ ISD::VSELECT : ISD::SELECT;
+
+ // Min/max matching is only viable if all output VTs are the same.
+ if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
+ EVT VT = ValueVTs[0];
+ LLVMContext &Ctx = *DAG.getContext();
+ auto &TLI = DAG.getTargetLoweringInfo();
+
+ // We care about the legality of the operation after it has been type
+ // legalized.
+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
+ VT != TLI.getTypeToTransformTo(Ctx, VT))
+ VT = TLI.getTypeToTransformTo(Ctx, VT);
+
+ // If the vselect is legal, assume we want to leave this as a vector setcc +
+ // vselect. Otherwise, if this is going to be scalarized, we want to see if
+ // min/max is legal on the scalar type.
+ bool UseScalarMinMax = VT.isVector() &&
+ !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
+
+ Value *LHS, *RHS;
+ auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+ ISD::NodeType Opc = ISD::DELETED_NODE;
+ switch (SPR.Flavor) {
+ case SPF_UMAX: Opc = ISD::UMAX; break;
+ case SPF_UMIN: Opc = ISD::UMIN; break;
+ case SPF_SMAX: Opc = ISD::SMAX; break;
+ case SPF_SMIN: Opc = ISD::SMIN; break;
+ case SPF_FMINNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_ANY: {
+ if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
+ Opc = ISD::FMINNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
+ Opc = ISD::FMINNAN;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
+ ISD::FMINNUM : ISD::FMINNAN;
+ break;
+ }
+ }
+ break;
+ case SPF_FMAXNUM:
+ switch (SPR.NaNBehavior) {
+ case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+ case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
+ case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_ANY:
+
+ if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
+ Opc = ISD::FMAXNUM;
+ else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
+ Opc = ISD::FMAXNAN;
+ else if (UseScalarMinMax)
+ Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
+ ISD::FMAXNUM : ISD::FMAXNAN;
+ break;
+ }
+ break;
+ default: break;
+ }
+
+ if (Opc != ISD::DELETED_NODE &&
+ (TLI.isOperationLegalOrCustom(Opc, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
+ // If the underlying comparison instruction is used by any other
+ // instruction, the consumed instructions won't be destroyed, so it is
+ // not profitable to convert to a min/max.
+ hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
+ OpCode = Opc;
+ LHSVal = getValue(LHS);
+ RHSVal = getValue(RHS);
+ BaseOps = {};
+ }
+ }
+
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
+ Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
+ Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
+ LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
+ Ops);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitTrunc(const User &I) {
+ // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(const User &I) {
+ // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(const User &I) {
+ // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+ // SExt also can't be a cast to bool for same reason. So, nothing much to do
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
+ // FPTrunc is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ SDLoc dl = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
+ DAG.getTargetConstant(
+ 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+}
+
+void SelectionDAGBuilder::visitFPExt(const User &I) {
+ // FPExt is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
+ // FPToUI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
+ // FPToSI is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
+ // UIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(const User &I) {
+ // SIToFP is never a no-op cast, no need to check
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
+ // What to do depends on the size of the integer and the size of the pointer.
+ // We can either truncate, zero extend, or no-op, accordingly.
+ SDValue N = getValue(I.getOperand(0));
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+ setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitBitCast(const User &I) {
+ SDValue N = getValue(I.getOperand(0));
+ SDLoc dl = getCurSDLoc();
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType());
+
+ // BitCast assures us that source and destination are the same size so this is
+ // either a BITCAST or a no-op.
+ if (DestVT != N.getValueType())
+ setValue(&I, DAG.getNode(ISD::BITCAST, dl,
+ DestVT, N)); // convert types.
+ // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
+ // might fold any kind of constant expression to an integer constant and that
+ // is not what we are looking for. Only regcognize a bitcast of a genuine
+ // constant integer as an opaque constant.
+ else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
+ setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
+ /*isOpaque*/true));
+ else
+ setValue(&I, N); // noop cast.
+}
+
+void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Value *SV = I.getOperand(0);
+ SDValue N = getValue(SV);
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ unsigned SrcAS = SV->getType()->getPointerAddressSpace();
+ unsigned DestAS = I.getType()->getPointerAddressSpace();
+
+ if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InVal = getValue(I.getOperand(1));
+ SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue InVec = getValue(I.getOperand(0));
+ SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+ TLI.getVectorIdxTy(DAG.getDataLayout()));
+ setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ InVec, InIdx));
+}
+
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
+ SDValue Src1 = getValue(I.getOperand(0));
+ SDValue Src2 = getValue(I.getOperand(1));
+
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+ unsigned MaskNumElts = Mask.size();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT SrcVT = Src1.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+ if (SrcNumElts == MaskNumElts) {
+ setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, Mask));
+ return;
+ }
+
+ // Normalize the shuffle vector since mask and vector length don't match.
+ if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+ // Mask is longer than the source vectors and is a multiple of the source
+ // vectors. We can use concatenate vector to make the mask and vectors
+ // lengths match.
+
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+
+ // Check if the shuffle is some kind of concatenation of the input vectors.
+ bool IsConcat = true;
+ SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ continue;
+ // Ensure the indices in each SrcVT sized piece are sequential and that
+ // the same source is used for the whole piece.
+ if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+ (ConcatSrcs[i / SrcNumElts] >= 0 &&
+ ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+ IsConcat = false;
+ break;
+ }
+ // Remember which source this index came from.
+ ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
+ }
+
+ // The shuffle is concatenating multiple vectors together. Just emit
+ // a CONCAT_VECTORS operation.
+ if (IsConcat) {
+ SmallVector<SDValue, 8> ConcatOps;
+ for (auto Src : ConcatSrcs) {
+ if (Src < 0)
+ ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+ else if (Src == 0)
+ ConcatOps.push_back(Src1);
+ else
+ ConcatOps.push_back(Src2);
+ }
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
+ VT, ConcatOps));
+ return;
+ }
+
+ // Pad both vectors with undefs to make them the same length as the mask.
+ SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+ SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+ SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+ MOps1[0] = Src1;
+ MOps2[0] = Src2;
+
+ Src1 = Src1.isUndef() ? DAG.getUNDEF(VT)
+ : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurSDLoc(), VT, MOps1);
+ Src2 = Src2.isUndef() ? DAG.getUNDEF(VT)
+ : DAG.getNode(ISD::CONCAT_VECTORS,
+ getCurSDLoc(), VT, MOps2);
+
+ // Readjust mask for new input vector length.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts - MaskNumElts;
+ MappedOps.push_back(Idx);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
+ MappedOps));
+ return;
+ }
+
+ if (SrcNumElts > MaskNumElts) {
+ // Analyze the access pattern of the vector to see if we can extract
+ // two subvectors and do the shuffle. The analysis is done by calculating
+ // the range of elements the mask access on both vectors.
+ int MinRange[2] = { static_cast<int>(SrcNumElts),
+ static_cast<int>(SrcNumElts)};
+ int MaxRange[2] = {-1, -1};
+
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ unsigned Input = 0;
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= (int)SrcNumElts) {
+ Input = 1;
+ Idx -= SrcNumElts;
+ }
+ if (Idx > MaxRange[Input])
+ MaxRange[Input] = Idx;
+ if (Idx < MinRange[Input])
+ MinRange[Input] = Idx;
+ }
+
+ // Check if the access is smaller than the vector size and can we find
+ // a reasonable extract index.
+ int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
+ // Extract.
+ int StartIdx[2]; // StartIdx to extract from
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
+ RangeUse[Input] = 0; // Unused
+ StartIdx[Input] = 0;
+ continue;
+ }
+
+ // Find a good start index that is a multiple of the mask length. Then
+ // see if the rest of the elements are in range.
+ StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+ if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
+ RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ }
+
+ if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+ setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+ return;
+ }
+ if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
+ // Extract appropriate subvector and generate a vector shuffle
+ for (unsigned Input = 0; Input < 2; ++Input) {
+ SDValue &Src = Input == 0 ? Src1 : Src2;
+ if (RangeUse[Input] == 0)
+ Src = DAG.getUNDEF(VT);
+ else {
+ SDLoc dl = getCurSDLoc();
+ Src = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, VT, Src,
+ DAG.getConstant(StartIdx[Input], dl,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ }
+
+ // Calculate new mask.
+ SmallVector<int, 8> MappedOps;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx >= 0) {
+ if (Idx < (int)SrcNumElts)
+ Idx -= StartIdx[0];
+ else
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ }
+ MappedOps.push_back(Idx);
+ }
+
+ setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
+ MappedOps));
+ return;
+ }
+ }
+
+ // We can't use either concat vectors or extract subvectors so fall back to
+ // replacing the shuffle with extract and build vector.
+ // to insert and build vector.
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDLoc dl = getCurSDLoc();
+ SmallVector<SDValue,8> Ops;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ SDValue Res;
+
+ if (Idx < 0) {
+ Res = DAG.getUNDEF(EltVT);
+ } else {
+ SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+ if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
+
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ EltVT, Src, DAG.getConstant(Idx, dl, IdxVT));
+ }
+
+ Ops.push_back(Res);
+ }
+
+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops));
+}
+
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ const Value *Op1 = I.getOperand(1);
+ Type *AggTy = I.getType();
+ Type *ValTy = Op1->getType();
+ bool IntoUndef = isa<UndefValue>(Op0);
+ bool FromUndef = isa<UndefValue>(Op1);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 4> AggValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
+
+ unsigned NumAggValues = AggValueVTs.size();
+ unsigned NumValValues = ValValueVTs.size();
+ SmallVector<SDValue, 4> Values(NumAggValues);
+
+ // Ignore an insertvalue that produces an empty object
+ if (!NumAggValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
+ SDValue Agg = getValue(Op0);
+ unsigned i = 0;
+ // Copy the beginning value(s) from the original aggregate.
+ for (; i != LinearIndex; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+ // Copy values from the inserted value(s).
+ if (NumValValues) {
+ SDValue Val = getValue(Op1);
+ for (; i != LinearIndex + NumValValues; ++i)
+ Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+ }
+ // Copy remaining value(s) from the original aggregate.
+ for (; i != NumAggValues; ++i)
+ Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(AggValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+ const Value *Op0 = I.getOperand(0);
+ Type *AggTy = Op0->getType();
+ Type *ValTy = I.getType();
+ bool OutOfUndef = isa<UndefValue>(Op0);
+
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 4> ValValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
+
+ unsigned NumValValues = ValValueVTs.size();
+
+ // Ignore a extractvalue that produces an empty object
+ if (!NumValValues) {
+ setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+ return;
+ }
+
+ SmallVector<SDValue, 4> Values(NumValValues);
+
+ SDValue Agg = getValue(Op0);
+ // Copy out the selected value(s).
+ for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+ Values[i - LinearIndex] =
+ OutOfUndef ?
+ DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+ SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ValValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
+ Value *Op0 = I.getOperand(0);
+ // Note that the pointer operand may be a vector of pointers. Take the scalar
+ // element which holds a pointer.
+ unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
+ SDValue N = getValue(Op0);
+ SDLoc dl = getCurSDLoc();
+
+ // Normalize Vector GEP - all scalar operands should be converted to the
+ // splat vector.
+ unsigned VectorWidth = I.getType()->isVectorTy() ?
+ cast<VectorType>(I.getType())->getVectorNumElements() : 0;
+
+ if (VectorWidth && !N.getValueType().isVector()) {
+ LLVMContext &Context = *DAG.getContext();
+ EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
+ SmallVector<SDValue, 16> Ops(VectorWidth, N);
+ N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ }
+ for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
+ GTI != E; ++GTI) {
+ const Value *Idx = GTI.getOperand();
+ if (StructType *StTy = dyn_cast<StructType>(*GTI)) {
+ unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
+ if (Field) {
+ // N = N + Offset
+ uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
+
+ // In an inbouds GEP with an offset that is nonnegative even when
+ // interpreted as signed, assume there is no unsigned overflow.
+ SDNodeFlags Flags;
+ if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
+ Flags.setNoUnsignedWrap(true);
+
+ N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
+ DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
+ }
+ } else {
+ MVT PtrTy =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS);
+ unsigned PtrSize = PtrTy.getSizeInBits();
+ APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+
+ // If this is a scalar constant or a splat vector of constants,
+ // handle it quickly.
+ const auto *CI = dyn_cast<ConstantInt>(Idx);
+ if (!CI && isa<ConstantDataVector>(Idx) &&
+ cast<ConstantDataVector>(Idx)->getSplatValue())
+ CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
+
+ if (CI) {
+ if (CI->isZero())
+ continue;
+ APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+ SDValue OffsVal = VectorWidth ?
+ DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
+ DAG.getConstant(Offs, dl, PtrTy);
+
+ // In an inbouds GEP with an offset that is nonnegative even when
+ // interpreted as signed, assume there is no unsigned overflow.
+ SDNodeFlags Flags;
+ if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
+ Flags.setNoUnsignedWrap(true);
+
+ N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags);
+ continue;
+ }
+
+ // N = N + Idx * ElementSize;
+ SDValue IdxN = getValue(Idx);
+
+ if (!IdxN.getValueType().isVector() && VectorWidth) {
+ MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
+ SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
+ IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ }
+ // If the index is smaller or larger than intptr_t, truncate or extend
+ // it.
+ IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
+
+ // If this is a multiply by a power of two, turn it into a shl
+ // immediately. This is a very common case.
+ if (ElementSize != 1) {
+ if (ElementSize.isPowerOf2()) {
+ unsigned Amt = ElementSize.logBase2();
+ IdxN = DAG.getNode(ISD::SHL, dl,
+ N.getValueType(), IdxN,
+ DAG.getConstant(Amt, dl, IdxN.getValueType()));
+ } else {
+ SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
+ IdxN = DAG.getNode(ISD::MUL, dl,
+ N.getValueType(), IdxN, Scale);
+ }
+ }
+
+ N = DAG.getNode(ISD::ADD, dl,
+ N.getValueType(), N, IdxN);
+ }
+ }
+
+ setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
+ // If this is a fixed sized alloca in the entry block of the function,
+ // allocate it statically on the stack.
+ if (FuncInfo.StaticAllocaMap.count(&I))
+ return; // getValue will auto-populate this.
+
+ SDLoc dl = getCurSDLoc();
+ Type *Ty = I.getAllocatedType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto &DL = DAG.getDataLayout();
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
+ unsigned Align =
+ std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
+
+ SDValue AllocSize = getValue(I.getArraySize());
+
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
+ if (AllocSize.getValueType() != IntPtr)
+ AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
+
+ AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
+ AllocSize,
+ DAG.getConstant(TySize, dl, IntPtr));
+
+ // Handle alignment. If the requested alignment is less than or equal to
+ // the stack alignment, ignore it. If the size is greater than or equal to
+ // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+ unsigned StackAlign =
+ DAG.getSubtarget().getFrameLowering()->getStackAlignment();
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+ AllocSize = DAG.getNode(ISD::ADD, dl,
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags);
+
+ // Mask out the low bits for alignment purposes.
+ AllocSize = DAG.getNode(ISD::AND, dl,
+ AllocSize.getValueType(), AllocSize,
+ DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1),
+ dl));
+
+ SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) };
+ SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
+ setValue(&I, DSA);
+ DAG.setRoot(DSA.getValue(1));
+
+ assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects());
+}
+
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+ if (I.isAtomic())
+ return visitAtomicLoad(I);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const Value *SV = I.getOperand(0);
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(SV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return visitLoadFromSwiftError(I);
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
+ if (Alloca->isSwiftError())
+ return visitLoadFromSwiftError(I);
+ }
+ }
+
+ SDValue Ptr = getValue(SV);
+
+ Type *Ty = I.getType();
+
+ bool isVolatile = I.isVolatile();
+ bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
+
+ // The IR notion of invariant_load only guarantees that all *non-faulting*
+ // invariant loads result in the same value. The MI notion of invariant load
+ // guarantees that the load can be legally moved to any location within its
+ // containing function. The MI notion of invariant_load is stronger than the
+ // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load
+ // with a guarantee that the location being loaded from is dereferenceable
+ // throughout the function's lifetime.
+
+ bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
+ isDereferenceablePointer(SV, DAG.getDataLayout());
+ unsigned Alignment = I.getAlignment();
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ SDValue Root;
+ bool ConstantMemory = false;
+ if (isVolatile || NumValues > MaxParallelChains)
+ // Serialize volatile loads with other side effects.
+ Root = getRoot();
+ else if (AA->pointsToConstantMemory(MemoryLocation(
+ SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = DAG.getRoot();
+ }
+
+ SDLoc dl = getCurSDLoc();
+
+ if (isVolatile)
+ Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
+
+ // An aggregate load cannot wrap around the address space, so offsets to its
+ // parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
+ EVT PtrVT = Ptr.getValueType();
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue A = DAG.getNode(ISD::ADD, dl,
+ PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], dl, PtrVT),
+ &Flags);
+ auto MMOFlags = MachineMemOperand::MONone;
+ if (isVolatile)
+ MMOFlags |= MachineMemOperand::MOVolatile;
+ if (isNonTemporal)
+ MMOFlags |= MachineMemOperand::MONonTemporal;
+ if (isInvariant)
+ MMOFlags |= MachineMemOperand::MOInvariant;
+
+ SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
+ MachinePointerInfo(SV, Offsets[i]), Alignment,
+ MMOFlags, AAInfo, Ranges);
+
+ Values[i] = L;
+ Chains[ChainI] = L.getValue(1);
+ }
+
+ if (!ConstantMemory) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
+ if (isVolatile)
+ DAG.setRoot(Chain);
+ else
+ PendingLoads.push_back(Chain);
+ }
+
+ setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(ValueVTs), Values));
+}
+
+void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ assert(TLI.supportSwiftError() &&
+ "call visitStoreToSwiftError when backend supports swifterror");
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ const Value *SrcV = I.getOperand(0);
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
+ SrcV->getType(), ValueVTs, &Offsets);
+ assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
+ "expect a single EVT for swifterror");
+
+ SDValue Src = getValue(SrcV);
+ // Create a virtual register, then update the virtual register.
+ auto &DL = DAG.getDataLayout();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
+ unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
+ // Chain can be getRoot or getControlRoot.
+ SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
+ SDValue(Src.getNode(), Src.getResNo()));
+ DAG.setRoot(CopyNode);
+ FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
+}
+
+void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
+ assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
+ "call visitLoadFromSwiftError when backend supports swifterror");
+
+ assert(!I.isVolatile() &&
+ I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
+ I.getMetadata(LLVMContext::MD_invariant_load) == nullptr &&
+ "Support volatile, non temporal, invariant for load_from_swift_error");
+
+ const Value *SV = I.getOperand(0);
+ Type *Ty = I.getType();
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ assert(!AA->pointsToConstantMemory(MemoryLocation(
+ SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) &&
+ "load_from_swift_error should not be constant memory");
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
+ ValueVTs, &Offsets);
+ assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
+ "expect a single EVT for swifterror");
+
+ // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
+ SDValue L = DAG.getCopyFromReg(getRoot(), getCurSDLoc(),
+ FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, SV),
+ ValueVTs[0]);
+
+ setValue(&I, L);
+}
+
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+ if (I.isAtomic())
+ return visitAtomicStore(I);
+
+ const Value *SrcV = I.getOperand(0);
+ const Value *PtrV = I.getOperand(1);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return visitStoreToSwiftError(I);
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
+ if (Alloca->isSwiftError())
+ return visitStoreToSwiftError(I);
+ }
+ }
+
+ SmallVector<EVT, 4> ValueVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
+ SrcV->getType(), ValueVTs, &Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ return;
+
+ // Get the lowered operands. Note that we do this after
+ // checking if NumResults is zero, because with zero results
+ // the operands won't have values in the map.
+ SDValue Src = getValue(SrcV);
+ SDValue Ptr = getValue(PtrV);
+
+ SDValue Root = getRoot();
+ SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
+ SDLoc dl = getCurSDLoc();
+ EVT PtrVT = Ptr.getValueType();
+ unsigned Alignment = I.getAlignment();
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+
+ auto MMOFlags = MachineMemOperand::MONone;
+ if (I.isVolatile())
+ MMOFlags |= MachineMemOperand::MOVolatile;
+ if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
+ MMOFlags |= MachineMemOperand::MONonTemporal;
+
+ // An aggregate load cannot wrap around the address space, so offsets to its
+ // parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
+ Root = Chain;
+ ChainI = 0;
+ }
+ SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
+ DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
+ SDValue St = DAG.getStore(
+ Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
+ MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
+ Chains[ChainI] = St;
+ }
+
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
+ DAG.setRoot(StoreNode);
+}
+
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
+ Value *PtrOperand = I.getArgOperand(1);
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(I.getArgOperand(0));
+ SDValue Mask = getValue(I.getArgOperand(3));
+ EVT VT = Src0.getValueType();
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOStore, VT.getStoreSize(),
+ Alignment, AAInfo);
+ SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
+ MMO, false);
+ DAG.setRoot(StoreNode);
+ setValue(&I, StoreNode);
+}
+
+// Get a uniform base for the Gather/Scatter intrinsic.
+// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
+// We try to represent it as a base pointer + vector of indices.
+// Usually, the vector of pointers comes from a 'getelementptr' instruction.
+// The first operand of the GEP may be a single pointer or a vector of pointers
+// Example:
+// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
+// or
+// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
+// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
+//
+// When the first GEP operand is a single pointer - it is the uniform base we
+// are looking for. If first operand of the GEP is a splat vector - we
+// extract the spalt value and use it as a uniform base.
+// In all other cases the function returns 'false'.
+//
+static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
+ SelectionDAGBuilder* SDB) {
+
+ SelectionDAG& DAG = SDB->DAG;
+ LLVMContext &Context = *DAG.getContext();
+
+ assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP || GEP->getNumOperands() > 2)
+ return false;
+
+ const Value *GEPPtr = GEP->getPointerOperand();
+ if (!GEPPtr->getType()->isVectorTy())
+ Ptr = GEPPtr;
+ else if (!(Ptr = getSplatValue(GEPPtr)))
+ return false;
+
+ Value *IndexVal = GEP->getOperand(1);
+
+ // The operands of the GEP may be defined in another basic block.
+ // In this case we'll not find nodes for the operands.
+ if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
+ return false;
+
+ Base = SDB->getValue(Ptr);
+ Index = SDB->getValue(IndexVal);
+
+ // Suppress sign extension.
+ if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+ if (SDB->findValue(Sext->getOperand(0))) {
+ IndexVal = Sext->getOperand(0);
+ Index = SDB->getValue(IndexVal);
+ }
+ }
+ if (!Index.getValueType().isVector()) {
+ unsigned GEPWidth = GEP->getType()->getVectorNumElements();
+ EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
+ SmallVector<SDValue, 16> Ops(GEPWidth, Index);
+ Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+ }
+ return true;
+}
+
+void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
+ const Value *Ptr = I.getArgOperand(1);
+ SDValue Src0 = getValue(I.getArgOperand(0));
+ SDValue Mask = getValue(I.getArgOperand(3));
+ EVT VT = Src0.getValueType();
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+
+ SDValue Base;
+ SDValue Index;
+ const Value *BasePtr = Ptr;
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+
+ const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
+ MachineMemOperand::MOStore, VT.getStoreSize(),
+ Alignment, AAInfo);
+ if (!UniformBase) {
+ Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(Ptr);
+ }
+ SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
+ SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
+ Ops, MMO);
+ DAG.setRoot(Scatter);
+ setValue(&I, Scatter);
+}
+
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
+ Value *PtrOperand = I.getArgOperand(0);
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(I.getArgOperand(3));
+ SDValue Mask = getValue(I.getArgOperand(2));
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ // Do not serialize masked loads of constant memory with anything.
+ bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation(
+ PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOLoad, VT.getStoreSize(),
+ Alignment, AAInfo, Ranges);
+
+ SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
+ ISD::NON_EXTLOAD);
+ if (AddToChain) {
+ SDValue OutChain = Load.getValue(1);
+ DAG.setRoot(OutChain);
+ }
+ setValue(&I, Load);
+}
+
+void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
+ const Value *Ptr = I.getArgOperand(0);
+ SDValue Src0 = getValue(I.getArgOperand(3));
+ SDValue Mask = getValue(I.getArgOperand(2));
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ SDValue Root = DAG.getRoot();
+ SDValue Base;
+ SDValue Index;
+ const Value *BasePtr = Ptr;
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
+ bool ConstantMemory = false;
+ if (UniformBase &&
+ AA->pointsToConstantMemory(MemoryLocation(
+ BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+ AAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ Root = DAG.getEntryNode();
+ ConstantMemory = true;
+ }
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
+ MachineMemOperand::MOLoad, VT.getStoreSize(),
+ Alignment, AAInfo, Ranges);
+
+ if (!UniformBase) {
+ Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(Ptr);
+ }
+ SDValue Ops[] = { Root, Src0, Mask, Base, Index };
+ SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
+ Ops, MMO);
+
+ SDValue OutChain = Gather.getValue(1);
+ if (!ConstantMemory)
+ PendingLoads.push_back(OutChain);
+ setValue(&I, Gather);
+}
+
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+ SDLoc dl = getCurSDLoc();
+ AtomicOrdering SuccessOrder = I.getSuccessOrdering();
+ AtomicOrdering FailureOrder = I.getFailureOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
+ SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
+ SDValue L = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
+ getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
+ /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope);
+
+ SDValue OutChain = L.getValue(2);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+ SDLoc dl = getCurSDLoc();
+ ISD::NodeType NT;
+ switch (I.getOperation()) {
+ default: llvm_unreachable("Unknown atomicrmw operation");
+ case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+ case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
+ case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
+ case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
+ case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+ case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
+ case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
+ case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
+ case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
+ case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+ case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+ }
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ SDValue L =
+ DAG.getAtomic(NT, dl,
+ getValue(I.getValOperand()).getSimpleValueType(),
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValOperand()),
+ I.getPointerOperand(),
+ /* Alignment=*/ 0, Order, Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+ SDLoc dl = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Ops[3];
+ Ops[0] = getRoot();
+ Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ Ops[2] = DAG.getConstant(I.getSynchScope(), dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+ SDLoc dl = getCurSDLoc();
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ if (I.getAlignment() < VT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic load");
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+ MachineMemOperand::MOVolatile |
+ MachineMemOperand::MOLoad,
+ VT.getStoreSize(),
+ I.getAlignment() ? I.getAlignment() :
+ DAG.getEVTAlignment(VT));
+
+ InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
+ SDValue L =
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+ getValue(I.getPointerOperand()), MMO,
+ Order, Scope);
+
+ SDValue OutChain = L.getValue(1);
+
+ setValue(&I, L);
+ DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+ SDLoc dl = getCurSDLoc();
+
+ AtomicOrdering Order = I.getOrdering();
+ SynchronizationScope Scope = I.getSynchScope();
+
+ SDValue InChain = getRoot();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT =
+ TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
+
+ if (I.getAlignment() < VT.getSizeInBits() / 8)
+ report_fatal_error("Cannot generate unaligned atomic store");
+
+ SDValue OutChain =
+ DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
+ InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getValueOperand()),
+ I.getPointerOperand(), I.getAlignment(),
+ Order, Scope);
+
+ DAG.setRoot(OutChain);
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ bool HasChain = !I.doesNotAccessMemory();
+ bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+ // Build the operand list.
+ SmallVector<SDValue, 8> Ops;
+ if (HasChain) { // If this intrinsic has side-effects, chainify it.
+ if (OnlyLoad) {
+ // We don't need to serialize loads against other loads.
+ Ops.push_back(DAG.getRoot());
+ } else {
+ Ops.push_back(getRoot());
+ }
+ }
+
+ // Info is set by getTgtMemInstrinsic
+ TargetLowering::IntrinsicInfo Info;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+ // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout())));
+
+ // Add all operands of the call to the operand list.
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ SDValue Op = getValue(I.getArgOperand(i));
+ Ops.push_back(Op);
+ }
+
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+
+ if (HasChain)
+ ValueVTs.push_back(MVT::Other);
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ // Create the node.
+ SDValue Result;
+ if (IsTgtIntrinsic) {
+ // This is target intrinsic that touches memory
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(),
+ VTs, Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align, Info.vol,
+ Info.readMem, Info.writeMem, Info.size);
+ } else if (!HasChain) {
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
+ } else if (!I.getType()->isVoidTy()) {
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
+ } else {
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+ }
+
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (!I.getType()->isVoidTy()) {
+ if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
+ Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
+ } else
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
+
+ setValue(&I, Result);
+ }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+/// Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
+ SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x007fffff, dl, MVT::i32));
+ SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+ DAG.getConstant(0x3f800000, dl, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexadecimal representation of floating point value.
+static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
+ const TargetLowering &TLI, const SDLoc &dl) {
+ SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+ DAG.getConstant(0x7f800000, dl, MVT::i32));
+ SDValue t1 = DAG.getNode(
+ ISD::SRL, dl, MVT::i32, t0,
+ DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+ DAG.getConstant(127, dl, MVT::i32));
+ return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
+ const SDLoc &dl) {
+ return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl,
+ MVT::f32);
+}
+
+static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ // IntegerPartOfX = ((int32_t)(t0);
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = t0 - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(
+ ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
+ DAG.getDataLayout())));
+
+ SDValue TwoToFractionalPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304, dl));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3, dl));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e, dl));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14, dl));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234, dl));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000, dl));
+ }
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
+}
+
+/// expandExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OFe 1.4426950f
+ // t0 = Op * LOG2OFe
+
+ // TODO: What fast-math-flags should be set here?
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+ getF32Constant(DAG, 0x3fb8aa3b, dl));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
+}
+
+/// expandLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log(2) [0.69314718f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3f317218, dl));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue LogOfMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // LogofMantissa =
+ // -1.1609546f +
+ // (1.4034025f - 0.23903021f * x) * x;
+ //
+ // error 0.0034276066, which is better than 8 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbe74c456, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3fb3a2b1, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f949a29, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // LogOfMantissa =
+ // -1.7417939f +
+ // (2.8212026f +
+ // (-1.4699568f +
+ // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+ //
+ // error 0.000061011436, which is 14 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbd67b6d6, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ee4f4b8, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fbc278b, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40348e95, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fdef31a, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // LogOfMantissa =
+ // -2.1072184f +
+ // (4.2372794f +
+ // (-3.7029485f +
+ // (2.2781945f +
+ // (-0.87823314f +
+ // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+ //
+ // error 0.0000023660568, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbc91e5ac, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e4350aa, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f60d3e3, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x4011cdf0, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x406cfd1c, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x408797cb, dl));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4006dcab, dl));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
+}
+
+/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Get the exponent.
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ // Different possible minimax approximations of significand in
+ // floating-point for various degrees of accuracy over [1,2].
+ SDValue Log2ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+ //
+ // error 0.0049451742, which is more than 7 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbeb08fe0, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x40019463, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fd6633d, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log2ofMantissa =
+ // -2.51285454f +
+ // (4.07009056f +
+ // (-2.12067489f +
+ // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+ //
+ // error 0.0000876136000, which is better than 13 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbda7262e, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f25280b, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x4007b923, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40823e2f, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x4020d29c, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log2ofMantissa =
+ // -3.0400495f +
+ // (6.1129976f +
+ // (-5.3420409f +
+ // (3.2865683f +
+ // (-1.2669343f +
+ // (0.27515199f -
+ // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+ //
+ // error 0.0000018516, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbcd2769e, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e8ce0b9, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3fa22ae7, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x40525723, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x40aaf200, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x40c39dad, dl));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x4042902c, dl));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
+}
+
+/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+
+ // TODO: What fast-math-flags should be set on the floating-point nodes?
+
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+ // Scale the exponent by log10(2) [0.30102999f].
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+ SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ getF32Constant(DAG, 0x3e9a209a, dl));
+
+ // Get the significand and build it into a floating-point number with
+ // exponent of 1.
+ SDValue X = GetSignificand(DAG, Op1, dl);
+
+ SDValue Log10ofMantissa;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // Log10ofMantissa =
+ // -0.50419619f +
+ // (0.60948995f - 0.10380950f * x) * x;
+ //
+ // error 0.0014886165, which is 6 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0xbdd49a13, dl));
+ SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3f1c0789, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f011300, dl));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // Log10ofMantissa =
+ // -0.64831180f +
+ // (0.91751397f +
+ // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+ //
+ // error 0.00019228036, which is better than 12 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3d431f31, dl));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3ea21fb2, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f6ae232, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f25f7c3, dl));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // Log10ofMantissa =
+ // -0.84299375f +
+ // (1.5327582f +
+ // (-1.0688956f +
+ // (0.49102474f +
+ // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+ //
+ // error 0.0000037995730, which is better than 18 bits
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3c5d51ce, dl));
+ SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+ getF32Constant(DAG, 0x3e00685a, dl));
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3efb6798, dl));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f88d192, dl));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3fc4316c, dl));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3f57ce70, dl));
+ }
+
+ return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
+}
+
+/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ if (Op.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
+ return getLimitedPrecisionExp2(Op, dl, DAG);
+
+ // No special expansion.
+ return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ bool IsExp10 = false;
+ if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+ if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
+ APFloat Ten(10.0f);
+ IsExp10 = LHSC->isExactlyValue(Ten);
+ }
+ }
+
+ // TODO: What fast-math-flags should be set on the FMUL node?
+ if (IsExp10) {
+ // Put the exponent in the right bit position for later addition to the
+ // final result:
+ //
+ // #define LOG2OF10 3.3219281f
+ // t0 = Op * LOG2OF10;
+ SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
+ getF32Constant(DAG, 0x40549a78, dl));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
+ }
+
+ // No special expansion.
+ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
+}
+
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG) {
+ // If RHS is a constant, we can expand this out to a multiplication tree,
+ // otherwise we end up lowering to a call to __powidf2 (for example). When
+ // optimizing for size, we only want to do this if the expansion would produce
+ // a small number of multiplies, otherwise we do the full expansion.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ // Get the exponent as a positive value.
+ unsigned Val = RHSC->getSExtValue();
+ if ((int)Val < 0) Val = -Val;
+
+ // powi(x, 0) -> 1.0
+ if (Val == 0)
+ return DAG.getConstantFP(1.0, DL, LHS.getValueType());
+
+ const Function *F = DAG.getMachineFunction().getFunction();
+ if (!F->optForSize() ||
+ // If optimizing for size, don't insert too many multiplies.
+ // This inserts up to 5 multiplies.
+ countPopulation(Val) + Log2_32(Val) < 7) {
+ // We use the simple binary decomposition method to generate the multiply
+ // sequence. There are more optimal ways to do this (for example,
+ // powi(x,15) generates one more multiply than it should), but this has
+ // the benefit of being both really simple and much better than a libcall.
+ SDValue Res; // Logically starts equal to 1.0
+ SDValue CurSquare = LHS;
+ // TODO: Intrinsics should have fast-math-flags that propagate to these
+ // nodes.
+ while (Val) {
+ if (Val & 1) {
+ if (Res.getNode())
+ Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+ else
+ Res = CurSquare; // 1.0*CurSquare.
+ }
+
+ CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+ CurSquare, CurSquare);
+ Val >>= 1;
+ }
+
+ // If the original was negative, invert the result, producing 1/(x*x*x).
+ if (RHSC->getSExtValue() < 0)
+ Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+ DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
+ return Res;
+ }
+ }
+
+ // Otherwise, expand to a libcall.
+ return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+// getUnderlyingArgReg - Find underlying register used for a truncated or
+// bitcasted argument.
+static unsigned getUnderlyingArgReg(const SDValue &N) {
+ switch (N.getOpcode()) {
+ case ISD::CopyFromReg:
+ return cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ case ISD::BITCAST:
+ case ISD::AssertZext:
+ case ISD::AssertSext:
+ case ISD::TRUNCATE:
+ return getUnderlyingArgReg(N.getOperand(0));
+ default:
+ return 0;
+ }
+}
+
+/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
+/// argument, create the corresponding DBG_VALUE machine instruction for it now.
+/// At the end of instruction selection, they will be inserted to the entry BB.
+bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
+ const Value *V, DILocalVariable *Variable, DIExpression *Expr,
+ DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
+ const Argument *Arg = dyn_cast<Argument>(V);
+ if (!Arg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+
+ // Ignore inlined function arguments here.
+ //
+ // FIXME: Should we be checking DL->inlinedAt() to determine this?
+ if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
+ return false;
+
+ Optional<MachineOperand> Op;
+ // Some arguments' frame index is recorded during argument lowering.
+ if (int FI = FuncInfo.getArgumentFrameIndex(Arg))
+ Op = MachineOperand::CreateFI(FI);
+
+ if (!Op && N.getNode()) {
+ unsigned Reg = getUnderlyingArgReg(N);
+ if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+ if (PR)
+ Reg = PR;
+ }
+ if (Reg)
+ Op = MachineOperand::CreateReg(Reg, false);
+ }
+
+ if (!Op) {
+ // Check if ValueMap has reg number.
+ DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end())
+ Op = MachineOperand::CreateReg(VMI->second, false);
+ }
+
+ if (!Op && N.getNode())
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ Op = MachineOperand::CreateFI(FINode->getIndex());
+
+ if (!Op)
+ return false;
+
+ assert(Variable->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ if (Op->isReg())
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ Op->getReg(), Offset, Variable, Expr));
+ else
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
+ .addOperand(*Op)
+ .addImm(Offset)
+ .addMetadata(Variable)
+ .addMetadata(Expr));
+
+ return true;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc sdl = getCurSDLoc();
+ DebugLoc dl = getCurDebugLoc();
+ SDValue Res;
+
+ switch (Intrinsic) {
+ default:
+ // By default, turn this into a target intrinsic node.
+ visitTargetIntrinsic(I, Intrinsic);
+ return nullptr;
+ case Intrinsic::vastart: visitVAStart(I); return nullptr;
+ case Intrinsic::vaend: visitVAEnd(I); return nullptr;
+ case Intrinsic::vacopy: visitVACopy(I); return nullptr;
+ case Intrinsic::returnaddress:
+ setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return nullptr;
+ case Intrinsic::frameaddress:
+ setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return nullptr;
+ case Intrinsic::read_register: {
+ Value *Reg = I.getArgOperand(0);
+ SDValue Chain = getRoot();
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ Res = DAG.getNode(ISD::READ_REGISTER, sdl,
+ DAG.getVTList(VT, MVT::Other), Chain, RegName);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return nullptr;
+ }
+ case Intrinsic::write_register: {
+ Value *Reg = I.getArgOperand(0);
+ Value *RegValue = I.getArgOperand(1);
+ SDValue Chain = getRoot();
+ SDValue RegName =
+ DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
+ DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
+ RegName, getValue(RegValue)));
+ return nullptr;
+ }
+ case Intrinsic::setjmp:
+ return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
+ case Intrinsic::longjmp:
+ return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
+ case Intrinsic::memcpy: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ false, isTC,
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)));
+ updateDAGForMaybeTailCall(MC);
+ return nullptr;
+ }
+ case Intrinsic::memset: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ isTC, MachinePointerInfo(I.getArgOperand(0)));
+ updateDAGForMaybeTailCall(MS);
+ return nullptr;
+ }
+ case Intrinsic::memmove: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ if (!Align)
+ Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+ bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+ SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ isTC, MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1)));
+ updateDAGForMaybeTailCall(MM);
+ return nullptr;
+ }
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expression = DI.getExpression();
+ const Value *Address = DI.getAddress();
+ assert(Variable && "Missing variable");
+ if (!Address) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return nullptr;
+ }
+
+ // Check if address has undef value.
+ if (isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return nullptr;
+ }
+
+ SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
+ SDDbgValue *SDV;
+ if (N.getNode()) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ // Parameters are handled specially.
+ bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+ auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+ if (isParameter && FINode) {
+ // Byval parameter. We have a frame index at this point.
+ SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
+ FINode->getIndex(), 0, dl, SDNodeOrder);
+ } else if (isa<Argument>(Address)) {
+ // Address is an argument, so try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+ N);
+ return nullptr;
+ } else {
+ SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
+ true, 0, dl, SDNodeOrder);
+ }
+ DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+ } else {
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+ N)) {
+ // If variable is pinned by a alloca in dominating bb then
+ // use StaticAllocaMap.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ if (AI->getParent() != DI.getParent()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second,
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ return nullptr;
+ }
+ }
+ }
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ }
+ }
+ return nullptr;
+ }
+ case Intrinsic::dbg_value: {
+ const DbgValueInst &DI = cast<DbgValueInst>(I);
+ assert(DI.getVariable() && "Missing variable");
+
+ DILocalVariable *Variable = DI.getVariable();
+ DIExpression *Expression = DI.getExpression();
+ uint64_t Offset = DI.getOffset();
+ const Value *V = DI.getValue();
+ if (!V)
+ return nullptr;
+
+ SDDbgValue *SDV;
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
+ SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl,
+ SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ } else {
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
+ false, N)) {
+ SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
+ false, Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ }
+ } else if (!V->use_empty() ) {
+ // Do not call getValue(V) yet, as we don't want to generate code.
+ // Remember it for later.
+ DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+ DanglingDebugInfoMap[V] = DDI;
+ } else {
+ // We may expand this to cover more cases. One case where we have no
+ // data available is an unreferenced parameter.
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ }
+ }
+
+ // Build a debug info table entry.
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+ V = BCI->getOperand(0);
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ // Don't handle byval struct arguments or VLAs, for example.
+ if (!AI) {
+ DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
+ DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
+ return nullptr;
+ }
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return nullptr; // VLAs.
+ return nullptr;
+ }
+
+ case Intrinsic::eh_typeid_for: {
+ // Find the type id for the given typeinfo.
+ GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
+ unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+ Res = DAG.getConstant(TypeID, sdl, MVT::i32);
+ setValue(&I, Res);
+ return nullptr;
+ }
+
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+ DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
+ MVT::Other,
+ getControlRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ case Intrinsic::eh_unwind_init:
+ DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+ return nullptr;
+ case Intrinsic::eh_dwarf_cfa: {
+ SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
+ TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue Offset = DAG.getNode(ISD::ADD, sdl,
+ CfaArg.getValueType(),
+ DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
+ CfaArg.getValueType()),
+ CfaArg);
+ SDValue FA = DAG.getNode(
+ ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()),
+ DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
+ setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
+ FA, Offset));
+ return nullptr;
+ }
+ case Intrinsic::eh_sjlj_callsite: {
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
+ assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+ assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+ MMI.setCurrentCallSite(CI->getZExtValue());
+ return nullptr;
+ }
+ case Intrinsic::eh_sjlj_functioncontext: {
+ // Get and store the index of the function context.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ AllocaInst *FnCtx =
+ cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+ int FI = FuncInfo.StaticAllocaMap[FnCtx];
+ MFI->setFunctionContextIndex(FI);
+ return nullptr;
+ }
+ case Intrinsic::eh_sjlj_setjmp: {
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+ setValue(&I, Op.getValue(0));
+ DAG.setRoot(Op.getValue(1));
+ return nullptr;
+ }
+ case Intrinsic::eh_sjlj_longjmp: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
+ return nullptr;
+ }
+ case Intrinsic::eh_sjlj_setup_dispatch: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
+ getRoot()));
+ return nullptr;
+ }
+
+ case Intrinsic::masked_gather:
+ visitMaskedGather(I);
+ return nullptr;
+ case Intrinsic::masked_load:
+ visitMaskedLoad(I);
+ return nullptr;
+ case Intrinsic::masked_scatter:
+ visitMaskedScatter(I);
+ return nullptr;
+ case Intrinsic::masked_store:
+ visitMaskedStore(I);
+ return nullptr;
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = getValue(I.getArgOperand(1));
+ if (isa<ConstantSDNode>(ShAmt)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return nullptr;
+ }
+ unsigned NewIntrinsic = 0;
+ EVT ShAmtVT = MVT::v2i32;
+ switch (Intrinsic) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ // We must do this early because v2i32 is not a legal type.
+ SDValue ShOps[2];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
+ DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
+ getValue(I.getArgOperand(0)), ShAmt);
+ setValue(&I, Res);
+ return nullptr;
+ }
+ case Intrinsic::convertff:
+ case Intrinsic::convertfsi:
+ case Intrinsic::convertfui:
+ case Intrinsic::convertsif:
+ case Intrinsic::convertuif:
+ case Intrinsic::convertss:
+ case Intrinsic::convertsu:
+ case Intrinsic::convertus:
+ case Intrinsic::convertuu: {
+ ISD::CvtCode Code = ISD::CVT_INVALID;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::convertff: Code = ISD::CVT_FF; break;
+ case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+ case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+ case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+ case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+ case Intrinsic::convertss: Code = ISD::CVT_SS; break;
+ case Intrinsic::convertsu: Code = ISD::CVT_SU; break;
+ case Intrinsic::convertus: Code = ISD::CVT_US; break;
+ case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
+ }
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ const Value *Op1 = I.getArgOperand(0);
+ Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1),
+ DAG.getValueType(DestVT),
+ DAG.getValueType(getValue(Op1).getValueType()),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)),
+ Code);
+ setValue(&I, Res);
+ return nullptr;
+ }
+ case Intrinsic::powi:
+ setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG));
+ return nullptr;
+ case Intrinsic::log:
+ setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return nullptr;
+ case Intrinsic::log2:
+ setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return nullptr;
+ case Intrinsic::log10:
+ setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return nullptr;
+ case Intrinsic::exp:
+ setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return nullptr;
+ case Intrinsic::exp2:
+ setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ return nullptr;
+ case Intrinsic::pow:
+ setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG, TLI));
+ return nullptr;
+ case Intrinsic::sqrt:
+ case Intrinsic::fabs:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ case Intrinsic::canonicalize: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::fabs: Opcode = ISD::FABS; break;
+ case Intrinsic::sin: Opcode = ISD::FSIN; break;
+ case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
+ }
+
+ setValue(&I, DAG.getNode(Opcode, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return nullptr;
+ }
+ case Intrinsic::minnum: {
+ auto VT = getValue(I.getArgOperand(0)).getValueType();
+ unsigned Opc =
+ I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)
+ ? ISD::FMINNAN
+ : ISD::FMINNUM;
+ setValue(&I, DAG.getNode(Opc, sdl, VT,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ }
+ case Intrinsic::maxnum: {
+ auto VT = getValue(I.getArgOperand(0)).getValueType();
+ unsigned Opc =
+ I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)
+ ? ISD::FMAXNAN
+ : ISD::FMAXNUM;
+ setValue(&I, DAG.getNode(Opc, sdl, VT,
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ }
+ case Intrinsic::copysign:
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
+ return nullptr;
+ case Intrinsic::fma:
+ setValue(&I, DAG.getNode(ISD::FMA, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ return nullptr;
+ case Intrinsic::fmuladd: {
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT)) {
+ setValue(&I, DAG.getNode(ISD::FMA, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2))));
+ } else {
+ // TODO: Intrinsic calls should have fast-math-flags.
+ SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)));
+ SDValue Add = DAG.getNode(ISD::FADD, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ Mul,
+ getValue(I.getArgOperand(2)));
+ setValue(&I, Add);
+ }
+ return nullptr;
+ }
+ case Intrinsic::convert_to_fp16:
+ setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
+ DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant(0, sdl,
+ MVT::i32))));
+ return nullptr;
+ case Intrinsic::convert_from_fp16:
+ setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
+ getValue(I.getArgOperand(0)))));
+ return nullptr;
+ case Intrinsic::pcmarker: {
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
+ return nullptr;
+ }
+ case Intrinsic::readcyclecounter: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
+ DAG.getVTList(MVT::i64, MVT::Other), Op);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return nullptr;
+ }
+ case Intrinsic::bitreverse:
+ setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return nullptr;
+ case Intrinsic::bswap:
+ setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
+ return nullptr;
+ case Intrinsic::cttz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+ sdl, Ty, Arg));
+ return nullptr;
+ }
+ case Intrinsic::ctlz: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+ sdl, Ty, Arg));
+ return nullptr;
+ }
+ case Intrinsic::ctpop: {
+ SDValue Arg = getValue(I.getArgOperand(0));
+ EVT Ty = Arg.getValueType();
+ setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
+ return nullptr;
+ }
+ case Intrinsic::stacksave: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(
+ ISD::STACKSAVE, sdl,
+ DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return nullptr;
+ }
+ case Intrinsic::stackrestore: {
+ Res = getValue(I.getArgOperand(0));
+ DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
+ return nullptr;
+ }
+ case Intrinsic::get_dynamic_area_offset: {
+ SDValue Op = getRoot();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
+ // target.
+ if (PtrTy != ResTy)
+ report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
+ " intrinsic!");
+ Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
+ Op);
+ DAG.setRoot(Op);
+ setValue(&I, Res);
+ return nullptr;
+ }
+ case Intrinsic::stackguard: {
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Module &M = *MF.getFunction()->getParent();
+ SDValue Chain = getRoot();
+ if (TLI.useLoadStackGuardNode()) {
+ Res = getLoadStackGuard(DAG, sdl, Chain);
+ } else {
+ const Value *Global = TLI.getSDagStackGuard(M);
+ unsigned Align = DL->getPrefTypeAlignment(Global->getType());
+ Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
+ MachinePointerInfo(Global, 0), Align,
+ MachineMemOperand::MOVolatile);
+ }
+ DAG.setRoot(Chain);
+ setValue(&I, Res);
+ return nullptr;
+ }
+ case Intrinsic::stackprotector: {
+ // Emit code into the DAG to store the stack guard onto the stack.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue Src, Chain = getRoot();
+
+ if (TLI.useLoadStackGuardNode())
+ Src = getLoadStackGuard(DAG, sdl, Chain);
+ else
+ Src = getValue(I.getArgOperand(0)); // The guard's value.
+
+ AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MFI->setStackProtectorIndex(FI);
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+ // Store the stack protector onto the stack.
+ Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI),
+ /* Alignment = */ 0, MachineMemOperand::MOVolatile);
+ setValue(&I, Res);
+ DAG.setRoot(Res);
+ return nullptr;
+ }
+ case Intrinsic::objectsize: {
+ // If we don't know by now, we're never going to know.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+
+ assert(CI && "Non-constant type in __builtin_object_size?");
+
+ SDValue Arg = getValue(I.getCalledValue());
+ EVT Ty = Arg.getValueType();
+
+ if (CI->isZero())
+ Res = DAG.getConstant(-1ULL, sdl, Ty);
+ else
+ Res = DAG.getConstant(0, sdl, Ty);
+
+ setValue(&I, Res);
+ return nullptr;
+ }
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ // Drop the intrinsic, but forward the value
+ setValue(&I, getValue(I.getOperand(0)));
+ return nullptr;
+ case Intrinsic::assume:
+ case Intrinsic::var_annotation:
+ // Discard annotate attributes and assumptions
+ return nullptr;
+
+ case Intrinsic::init_trampoline: {
+ const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
+
+ SDValue Ops[6];
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
+ Ops[5] = DAG.getSrcValue(F);
+
+ Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
+
+ DAG.setRoot(Res);
+ return nullptr;
+ }
+ case Intrinsic::adjust_trampoline: {
+ setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
+ TLI.getPointerTy(DAG.getDataLayout()),
+ getValue(I.getArgOperand(0))));
+ return nullptr;
+ }
+ case Intrinsic::gcroot: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+ (void)F;
+ assert(F->hasGC() &&
+ "only valid in functions with gc specified, enforced by Verifier");
+ assert(GFI && "implied by previous");
+ const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+ FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+ GFI->addStackRoot(FI->getIndex(), TypeMap);
+ return nullptr;
+ }
+ case Intrinsic::gcread:
+ case Intrinsic::gcwrite:
+ llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+ case Intrinsic::flt_rounds:
+ setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
+ return nullptr;
+
+ case Intrinsic::expect: {
+ // Just replace __builtin_expect(exp, c) with EXP.
+ setValue(&I, getValue(I.getArgOperand(0)));
+ return nullptr;
+ }
+
+ case Intrinsic::debugtrap:
+ case Intrinsic::trap: {
+ StringRef TrapFuncName =
+ I.getAttributes()
+ .getAttribute(AttributeSet::FunctionIndex, "trap-func-name")
+ .getValueAsString();
+ if (TrapFuncName.empty()) {
+ ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
+ ISD::TRAP : ISD::DEBUGTRAP;
+ DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
+ return nullptr;
+ }
+ TargetLowering::ArgListTy Args;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee(
+ CallingConv::C, I.getType(),
+ DAG.getExternalSymbol(TrapFuncName.data(),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
+
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+ DAG.setRoot(Result.second);
+ return nullptr;
+ }
+
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow: {
+ ISD::NodeType Op;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+ case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+ case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+ case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+ case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+ }
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+
+ SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
+ return nullptr;
+ }
+ case Intrinsic::prefetch: {
+ SDValue Ops[5];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ Ops[0] = getRoot();
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = getValue(I.getArgOperand(3));
+ DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
+ DAG.getVTList(MVT::Other), Ops,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ false, /* volatile */
+ rw==0, /* read */
+ rw==1)); /* write */
+ return nullptr;
+ }
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end: {
+ bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
+ // Stack coloring is not enabled in O0, discard region information.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return nullptr;
+
+ SmallVector<Value *, 4> Allocas;
+ GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
+
+ for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
+ E = Allocas.end(); Object != E; ++Object) {
+ AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+
+ // Could not find an Alloca.
+ if (!LifetimeObject)
+ continue;
+
+ // First check that the Alloca is static, otherwise it won't have a
+ // valid frame index.
+ auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
+ if (SI == FuncInfo.StaticAllocaMap.end())
+ return nullptr;
+
+ int FI = SI->second;
+
+ SDValue Ops[2];
+ Ops[0] = getRoot();
+ Ops[1] =
+ DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true);
+ unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+
+ Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
+ DAG.setRoot(Res);
+ }
+ return nullptr;
+ }
+ case Intrinsic::invariant_start:
+ // Discard region information.
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
+ return nullptr;
+ case Intrinsic::invariant_end:
+ // Discard region information.
+ return nullptr;
+ case Intrinsic::clear_cache:
+ return TLI.getClearCacheBuiltinName();
+ case Intrinsic::donothing:
+ // ignore
+ return nullptr;
+ case Intrinsic::experimental_stackmap: {
+ visitStackmap(I);
+ return nullptr;
+ }
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64: {
+ visitPatchpoint(&I);
+ return nullptr;
+ }
+ case Intrinsic::experimental_gc_statepoint: {
+ LowerStatepoint(ImmutableStatepoint(&I));
+ return nullptr;
+ }
+ case Intrinsic::experimental_gc_result: {
+ visitGCResult(cast<GCResultInst>(I));
+ return nullptr;
+ }
+ case Intrinsic::experimental_gc_relocate: {
+ visitGCRelocate(cast<GCRelocateInst>(I));
+ return nullptr;
+ }
+ case Intrinsic::instrprof_increment:
+ llvm_unreachable("instrprof failed to lower an increment");
+ case Intrinsic::instrprof_value_profile:
+ llvm_unreachable("instrprof failed to lower a value profiling call");
+ case Intrinsic::localescape: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
+
+ // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
+ // is the same on all targets.
+ for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+ Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
+ if (isa<ConstantPointerNull>(Arg))
+ continue; // Skip null pointers. They represent a hole in index space.
+ AllocaInst *Slot = cast<AllocaInst>(Arg);
+ assert(FuncInfo.StaticAllocaMap.count(Slot) &&
+ "can only escape static allocas");
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+ GlobalValue::getRealLinkageName(MF.getName()), Idx);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
+ TII->get(TargetOpcode::LOCAL_ESCAPE))
+ .addSym(FrameAllocSym)
+ .addFrameIndex(FI);
+ }
+
+ return nullptr;
+ }
+
+ case Intrinsic::localrecover: {
+ // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
+ MachineFunction &MF = DAG.getMachineFunction();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
+
+ // Get the symbol that defines the frame offset.
+ auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+ auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
+ unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+ GlobalValue::getRealLinkageName(Fn->getName()), IdxVal);
+
+ // Create a MCSymbol for the label to avoid any target lowering
+ // that would make this PC relative.
+ SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
+ SDValue OffsetVal =
+ DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
+
+ // Add the offset to the FP.
+ Value *FP = I.getArgOperand(1);
+ SDValue FPVal = getValue(FP);
+ SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
+ setValue(&I, Add);
+
+ return nullptr;
+ }
+
+ case Intrinsic::eh_exceptionpointer:
+ case Intrinsic::eh_exceptioncode: {
+ // Get the exception pointer vreg, copy from it, and resize it to fit.
+ const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
+ unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ SDValue N =
+ DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
+ if (Intrinsic == Intrinsic::eh_exceptioncode)
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+ setValue(&I, N);
+ return nullptr;
+ }
+
+ case Intrinsic::experimental_deoptimize:
+ LowerDeoptimizeCall(&I);
+ return nullptr;
+ }
+}
+
+std::pair<SDValue, SDValue>
+SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB) {
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ MCSymbol *BeginLabel = nullptr;
+
+ if (EHPadBB) {
+ // Insert a label before the invoke call to mark the try range. This can be
+ // used to detect deletion of the invoke via the MachineModuleInfo.
+ BeginLabel = MMI.getContext().createTempSymbol();
+
+ // For SjLj, keep track of which landing pads go with which invokes
+ // so as to maintain the ordering of pads in the LSDA.
+ unsigned CallSiteIndex = MMI.getCurrentCallSite();
+ if (CallSiteIndex) {
+ MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+ LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
+
+ // Now that the call site is handled, stop tracking it.
+ MMI.setCurrentCallSite(0);
+ }
+
+ // Both PendingLoads and PendingExports must be flushed here;
+ // this call might not return.
+ (void)getRoot();
+ DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel));
+
+ CLI.setChain(getRoot());
+ }
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+
+ assert((CLI.IsTailCall || Result.second.getNode()) &&
+ "Non-null chain expected with non-tail call!");
+ assert((Result.second.getNode() || !Result.first.getNode()) &&
+ "Null value expected with tail call!");
+
+ if (!Result.second.getNode()) {
+ // As a special case, a null chain means that a tail call has been emitted
+ // and the DAG root is already updated.
+ HasTailCall = true;
+
+ // Since there's no actual continuation from this block, nothing can be
+ // relying on us setting vregs for them.
+ PendingExports.clear();
+ } else {
+ DAG.setRoot(Result.second);
+ }
+
+ if (EHPadBB) {
+ // Insert a label at the end of the invoke call to mark the try range. This
+ // can be used to detect deletion of the invoke via the MachineModuleInfo.
+ MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
+ DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
+
+ // Inform MachineModuleInfo of range.
+ if (MMI.hasEHFunclets()) {
+ assert(CLI.CS);
+ WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+ EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
+ BeginLabel, EndLabel);
+ } else {
+ MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+ }
+ }
+
+ return Result;
+}
+
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+ bool isTailCall,
+ const BasicBlock *EHPadBB) {
+ auto &DL = DAG.getDataLayout();
+ FunctionType *FTy = CS.getFunctionType();
+ Type *RetTy = CS.getType();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Args.reserve(CS.arg_size());
+
+ const Value *SwiftErrorVal = nullptr;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ const Value *V = *i;
+
+ // Skip empty types
+ if (V->getType()->isEmptyTy())
+ continue;
+
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode; Entry.Ty = V->getType();
+
+ // Skip the first return-type Attribute to get to params.
+ Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+
+ // Use swifterror virtual register as input to the call.
+ if (Entry.isSwiftError && TLI.supportSwiftError()) {
+ SwiftErrorVal = V;
+ // We find the virtual register for the actual swifterror argument.
+ // Instead of using the Value, we use the virtual register instead.
+ Entry.Node = DAG.getRegister(
+ FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, V),
+ EVT(TLI.getPointerTy(DL)));
+ }
+
+ Args.push_back(Entry);
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (Entry.isSRet && isa<Instruction>(V))
+ isTailCall = false;
+ }
+
+ // Check if target-independent constraints permit a tail call here.
+ // Target-dependent constraints are checked within TLI->LowerCallTo.
+ if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
+ isTailCall = false;
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
+ .setTailCall(isTailCall)
+ .setConvergent(CS.isConvergent());
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
+
+ if (Result.first.getNode()) {
+ const Instruction *Inst = CS.getInstruction();
+ Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
+ setValue(Inst, Result.first);
+ }
+
+ // The last element of CLI.InVals has the SDValue for swifterror return.
+ // Here we copy it to a virtual register and update SwiftErrorMap for
+ // book-keeping.
+ if (SwiftErrorVal && TLI.supportSwiftError()) {
+ // Get the last element of InVals.
+ SDValue Src = CLI.InVals.back();
+ const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
+ unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
+ SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
+ // We update the virtual register for the actual swifterror argument.
+ FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
+ DAG.setRoot(CopyNode);
+ }
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
+ for (const User *U : V->users()) {
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
+ if (IC->isEquality())
+ if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+ if (C->isNullValue())
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+ Type *LoadTy,
+ SelectionDAGBuilder &Builder) {
+
+ // Check to see if this load can be trivially constant folded, e.g. if the
+ // input is from a string literal.
+ if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+ // Cast pointer to the type we really want to load.
+ LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
+ PointerType::getUnqual(LoadTy));
+
+ if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
+ const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL))
+ return Builder.getValue(LoadCst);
+ }
+
+ // Otherwise, we have to emit the load. If the pointer is to unfoldable but
+ // still constant memory, the input chain can be the entry node.
+ SDValue Root;
+ bool ConstantMemory = false;
+
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+ Root = Builder.DAG.getEntryNode();
+ ConstantMemory = true;
+ } else {
+ // Do not serialize non-volatile loads against each other.
+ Root = Builder.DAG.getRoot();
+ }
+
+ SDValue Ptr = Builder.getValue(PtrVal);
+ SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root,
+ Ptr, MachinePointerInfo(PtrVal),
+ /* Alignment = */ 1);
+
+ if (!ConstantMemory)
+ Builder.PendingLoads.push_back(LoadVal.getValue(1));
+ return LoadVal;
+}
+
+/// processIntegerCallValue - Record the value for an instruction that
+/// produces an integer result, converting the type where necessary.
+void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
+ SDValue Value,
+ bool IsSigned) {
+ EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType(), true);
+ if (IsSigned)
+ Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
+ else
+ Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
+ setValue(&I, Value);
+}
+
+/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+ // Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
+ if (I.getNumArgOperands() != 3)
+ return false;
+
+ const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
+ if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+ !I.getArgOperand(2)->getType()->isIntegerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const Value *Size = I.getArgOperand(2);
+ const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
+ if (CSize && CSize->getZExtValue() == 0) {
+ EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ I.getType(), true);
+ setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
+ return true;
+ }
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(LHS), getValue(RHS), getValue(Size),
+ MachinePointerInfo(LHS),
+ MachinePointerInfo(RHS));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
+ // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
+ if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) {
+ bool ActuallyDoIt = true;
+ MVT LoadVT;
+ Type *LoadTy;
+ switch (CSize->getZExtValue()) {
+ default:
+ LoadVT = MVT::Other;
+ LoadTy = nullptr;
+ ActuallyDoIt = false;
+ break;
+ case 2:
+ LoadVT = MVT::i16;
+ LoadTy = Type::getInt16Ty(CSize->getContext());
+ break;
+ case 4:
+ LoadVT = MVT::i32;
+ LoadTy = Type::getInt32Ty(CSize->getContext());
+ break;
+ case 8:
+ LoadVT = MVT::i64;
+ LoadTy = Type::getInt64Ty(CSize->getContext());
+ break;
+ /*
+ case 16:
+ LoadVT = MVT::v4i32;
+ LoadTy = Type::getInt32Ty(CSize->getContext());
+ LoadTy = VectorType::get(LoadTy, 4);
+ break;
+ */
+ }
+
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+
+ // Require that we can find a legal MVT, and only do this if the target
+ // supports unaligned loads of that type. Expanding into byte loads would
+ // bloat the code.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (ActuallyDoIt && CSize->getZExtValue() > 4) {
+ unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+ unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+ // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+ // TODO: Check alignment of src and dest ptrs.
+ if (!TLI.isTypeLegal(LoadVT) ||
+ !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) ||
+ !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS))
+ ActuallyDoIt = false;
+ }
+
+ if (ActuallyDoIt) {
+ SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
+ SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+
+ SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
+ ISD::SETNE);
+ processIntegerCallValue(I, Res, false);
+ return true;
+ }
+ }
+
+
+ return false;
+}
+
+/// visitMemChrCall -- See if we can lower a memchr call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
+ // Verify that the prototype makes sense. void *memchr(void *, int, size_t)
+ if (I.getNumArgOperands() != 3)
+ return false;
+
+ const Value *Src = I.getArgOperand(0);
+ const Value *Char = I.getArgOperand(1);
+ const Value *Length = I.getArgOperand(2);
+ if (!Src->getType()->isPointerTy() ||
+ !Char->getType()->isIntegerTy() ||
+ !Length->getType()->isIntegerTy() ||
+ !I.getType()->isPointerTy())
+ return false;
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Src), getValue(Char), getValue(Length),
+ MachinePointerInfo(Src));
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
+/// optimized form. If so, return true and lower it, otherwise return false
+/// and it will be lowered like a normal call.
+bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
+ // Verify that the prototype makes sense. char *strcpy(char *, char *)
+ if (I.getNumArgOperands() != 2)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+ if (!Arg0->getType()->isPointerTy() ||
+ !Arg1->getType()->isPointerTy() ||
+ !I.getType()->isPointerTy())
+ return false;
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1), isStpcpy);
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ DAG.setRoot(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
+ // Verify that the prototype makes sense. int strcmp(void*,void*)
+ if (I.getNumArgOperands() != 2)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+ if (!Arg0->getType()->isPointerTy() ||
+ !Arg1->getType()->isPointerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0),
+ MachinePointerInfo(Arg1));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, true);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrLenCall -- See if we can lower a strlen call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
+ // Verify that the prototype makes sense. size_t strlen(char *)
+ if (I.getNumArgOperands() != 1)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0);
+ if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy())
+ return false;
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
+ // Verify that the prototype makes sense. size_t strnlen(char *, size_t)
+ if (I.getNumArgOperands() != 2)
+ return false;
+
+ const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
+ if (!Arg0->getType()->isPointerTy() ||
+ !Arg1->getType()->isIntegerTy() ||
+ !I.getType()->isIntegerTy())
+ return false;
+
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Arg0), getValue(Arg1),
+ MachinePointerInfo(Arg0));
+ if (Res.first.getNode()) {
+ processIntegerCallValue(I, Res.first, false);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
+/// visitUnaryFloatCall - If a call instruction is a unary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a unary floating-point call.
+ if (I.getNumArgOperands() != 1 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp));
+ return true;
+}
+
+/// visitBinaryFloatCall - If a call instruction is a binary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
+ unsigned Opcode) {
+ // Sanity check that it really is a binary floating-point call.
+ if (I.getNumArgOperands() != 2 ||
+ !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+ I.getType() != I.getArgOperand(0)->getType() ||
+ I.getType() != I.getArgOperand(1)->getType() ||
+ !I.onlyReadsMemory())
+ return false;
+
+ SDValue Tmp0 = getValue(I.getArgOperand(0));
+ SDValue Tmp1 = getValue(I.getArgOperand(1));
+ EVT VT = Tmp0.getValueType();
+ setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1));
+ return true;
+}
+
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
+ // Handle inline assembly differently.
+ if (isa<InlineAsm>(I.getCalledValue())) {
+ visitInlineAsm(&I);
+ return;
+ }
+
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ ComputeUsesVAFloatArgument(I, &MMI);
+
+ const char *RenameFn = nullptr;
+ if (Function *F = I.getCalledFunction()) {
+ if (F->isDeclaration()) {
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
+ if (unsigned IID = II->getIntrinsicID(F)) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+ if (Intrinsic::ID IID = F->getIntrinsicID()) {
+ RenameFn = visitIntrinsicCall(I, IID);
+ if (!RenameFn)
+ return;
+ }
+ }
+
+ // Check for well-known libc/libm calls. If the function is internal, it
+ // can't be a library call. Don't do the check if marked as nobuiltin for
+ // some reason.
+ LibFunc::Func Func;
+ if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ switch (Func) {
+ default: break;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ case LibFunc::copysignl:
+ if (I.getNumArgOperands() == 2 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.getType() == I.getArgOperand(1)->getType() &&
+ I.onlyReadsMemory()) {
+ SDValue LHS = getValue(I.getArgOperand(0));
+ SDValue RHS = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
+ LHS.getValueType(), LHS, RHS));
+ return;
+ }
+ break;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ if (visitUnaryFloatCall(I, ISD::FABS))
+ return;
+ break;
+ case LibFunc::fmin:
+ case LibFunc::fminf:
+ case LibFunc::fminl:
+ if (visitBinaryFloatCall(I, ISD::FMINNUM))
+ return;
+ break;
+ case LibFunc::fmax:
+ case LibFunc::fmaxf:
+ case LibFunc::fmaxl:
+ if (visitBinaryFloatCall(I, ISD::FMAXNUM))
+ return;
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ if (visitUnaryFloatCall(I, ISD::FSIN))
+ return;
+ break;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ if (visitUnaryFloatCall(I, ISD::FCOS))
+ return;
+ break;
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ case LibFunc::sqrt_finite:
+ case LibFunc::sqrtf_finite:
+ case LibFunc::sqrtl_finite:
+ if (visitUnaryFloatCall(I, ISD::FSQRT))
+ return;
+ break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ if (visitUnaryFloatCall(I, ISD::FFLOOR))
+ return;
+ break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
+ return;
+ break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ if (visitUnaryFloatCall(I, ISD::FCEIL))
+ return;
+ break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ if (visitUnaryFloatCall(I, ISD::FRINT))
+ return;
+ break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ if (visitUnaryFloatCall(I, ISD::FROUND))
+ return;
+ break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ if (visitUnaryFloatCall(I, ISD::FTRUNC))
+ return;
+ break;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ if (visitUnaryFloatCall(I, ISD::FLOG2))
+ return;
+ break;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ if (visitUnaryFloatCall(I, ISD::FEXP2))
+ return;
+ break;
+ case LibFunc::memcmp:
+ if (visitMemCmpCall(I))
+ return;
+ break;
+ case LibFunc::memchr:
+ if (visitMemChrCall(I))
+ return;
+ break;
+ case LibFunc::strcpy:
+ if (visitStrCpyCall(I, false))
+ return;
+ break;
+ case LibFunc::stpcpy:
+ if (visitStrCpyCall(I, true))
+ return;
+ break;
+ case LibFunc::strcmp:
+ if (visitStrCmpCall(I))
+ return;
+ break;
+ case LibFunc::strlen:
+ if (visitStrLenCall(I))
+ return;
+ break;
+ case LibFunc::strnlen:
+ if (visitStrNLenCall(I))
+ return;
+ break;
+ }
+ }
+ }
+
+ SDValue Callee;
+ if (!RenameFn)
+ Callee = getValue(I.getCalledValue());
+ else
+ Callee = DAG.getExternalSymbol(
+ RenameFn,
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower calls with arbitrary operand bundles!");
+
+ if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+ LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
+ else
+ // Check if we can potentially perform a tail call. More detailed checking
+ // is be done within LowerCallTo, after more information about the call is
+ // known.
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+namespace {
+
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
+public:
+ /// CallOperand - If this is the result output operand or a clobber
+ /// this is null, otherwise it is the incoming operand to the CallInst.
+ /// This gets modified as the asm is processed.
+ SDValue CallOperand;
+
+ /// AssignedRegs - If this is a register or register class operand, this
+ /// contains the set of register corresponding to the operand.
+ RegsForValue AssignedRegs;
+
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
+ : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) {
+ }
+
+ /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+ /// corresponds to. If there is no Value* for this operand, it returns
+ /// MVT::Other.
+ EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL) const {
+ if (!CallOperandVal) return MVT::Other;
+
+ if (isa<BasicBlock>(CallOperandVal))
+ return TLI.getPointerTy(DL);
+
+ llvm::Type *OpTy = CallOperandVal->getType();
+
+ // FIXME: code duplicated from TargetLowering::ParseConstraints().
+ // If this is an indirect operand, the operand is a pointer to the
+ // accessed type.
+ if (isIndirect) {
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = DL.getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpTy = IntegerType::get(Context, BitSize);
+ break;
+ }
+ }
+
+ return TLI.getValueType(DL, OpTy, true);
+ }
+};
+
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
+} // end anonymous namespace
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand. We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process. However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation. This produces generally horrible, but correct, code.
+///
+/// OpInfo describes the operand.
+///
+static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
+ const SDLoc &DL,
+ SDISelAsmOperandInfo &OpInfo) {
+ LLVMContext &Context = *DAG.getContext();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<unsigned, 4> Regs;
+
+ // If this is a constraint for a single physreg, or a constraint for a
+ // register class, find it.
+ std::pair<unsigned, const TargetRegisterClass *> PhysReg =
+ TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(),
+ OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+
+ unsigned NumRegs = 1;
+ if (OpInfo.ConstraintVT != MVT::Other) {
+ // If this is a FP input in an integer register (or visa versa) insert a bit
+ // cast of the input value. More generally, handle any case where the input
+ // value disagrees with the register class we plan to stick this in.
+ if (OpInfo.Type == InlineAsm::isInput &&
+ PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+ // Try to convert to the first EVT that the reg class contains. If the
+ // types are identical size, use a bitcast to convert (e.g. two differing
+ // vector types).
+ MVT RegVT = *PhysReg.second->vt_begin();
+ if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) {
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+ // If the input is a FP value and we want it in FP registers, do a
+ // bitcast to the corresponding integer type. This turns an f64 value
+ // into i64, which can be passed with two i32 values on a 32-bit
+ // machine.
+ RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
+ RegVT, OpInfo.CallOperand);
+ OpInfo.ConstraintVT = RegVT;
+ }
+ }
+
+ NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+ }
+
+ MVT RegVT;
+ EVT ValueVT = OpInfo.ConstraintVT;
+
+ // If this is a constraint for a specific physical register, like {r17},
+ // assign it now.
+ if (unsigned AssignedReg = PhysReg.first) {
+ const TargetRegisterClass *RC = PhysReg.second;
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = *RC->vt_begin();
+
+ // Get the actual register value type. This is important, because the user
+ // may have asked for (e.g.) the AX register in i32 type. We need to
+ // remember that AX is actually i16 to get the right extension.
+ RegVT = *RC->vt_begin();
+
+ // This is a explicit reference to a physical register.
+ Regs.push_back(AssignedReg);
+
+ // If this is an expanded reference, add the rest of the regs to Regs.
+ if (NumRegs != 1) {
+ TargetRegisterClass::iterator I = RC->begin();
+ for (; *I != AssignedReg; ++I)
+ assert(I != RC->end() && "Didn't find reg!");
+
+ // Already added the first reg.
+ --NumRegs; ++I;
+ for (; NumRegs; --NumRegs, ++I) {
+ assert(I != RC->end() && "Ran out of registers to allocate!");
+ Regs.push_back(*I);
+ }
+ }
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // Otherwise, if this was a reference to an LLVM register class, create vregs
+ // for this reference.
+ if (const TargetRegisterClass *RC = PhysReg.second) {
+ RegVT = *RC->vt_begin();
+ if (OpInfo.ConstraintVT == MVT::Other)
+ ValueVT = RegVT;
+
+ // Create the appropriate number of virtual registers.
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ for (; NumRegs; --NumRegs)
+ Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return;
+ }
+
+ // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+ /// ConstraintOperands - Information about all of the constraints.
+ SDISelAsmOperandInfoVector ConstraintOperands;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
+ DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
+
+ bool hasMemory = false;
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ MVT OpVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpVT = TLI.getSimpleValueType(DAG.getDataLayout(),
+ STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ // If this is an input or an indirect output, process the call argument.
+ // BasicBlocks are labels, currently appearing only in asm's.
+ if (OpInfo.CallOperandVal) {
+ if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+ } else {
+ OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+ }
+
+ OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
+ DAG.getDataLayout()).getSimpleVT();
+ }
+
+ OpInfo.ConstraintVT = OpVT;
+
+ // Indirect operand accesses access memory.
+ if (OpInfo.isIndirect)
+ hasMemory = true;
+ else {
+ for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType
+ CType = TLI.getConstraintType(OpInfo.Codes[j]);
+ if (CType == TargetLowering::C_Memory) {
+ hasMemory = true;
+ break;
+ }
+ }
+ }
+ }
+
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
+ // Second pass over the constraints: compute which constraint option to use
+ // and assign registers to constraints that want a specific physreg.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+ Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ Input.ConstraintVT = OpInfo.ConstraintVT;
+ }
+ }
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is a memory input, and if the operand is not indirect, do what we
+ // need to to provide an address for the memory input.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ !OpInfo.isIndirect) {
+ assert((OpInfo.isMultipleAlternative ||
+ (OpInfo.Type == InlineAsm::isInput)) &&
+ "Can only indirectify direct input operands!");
+
+ // Memory operands really want the address of the value. If we don't have
+ // an indirect input, put it in the constpool if we can, otherwise spill
+ // it to a stack slot.
+ // TODO: This isn't quite right. We need to handle these according to
+ // the addressing mode that the constraint wants. Also, this may take
+ // an additional register for the computation and we don't want that
+ // either.
+
+ // If the operand is a float, integer, or vector constant, spill to a
+ // constant pool entry to get its address.
+ const Value *OpVal = OpInfo.CallOperandVal;
+ if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+ isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
+ OpInfo.CallOperand = DAG.getConstantPool(
+ cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
+ } else {
+ // Otherwise, create a stack slot and emit a store to it before the
+ // asm.
+ Type *Ty = OpVal->getType();
+ auto &DL = DAG.getDataLayout();
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
+ unsigned Align = DL.getPrefTypeAlignment(Ty);
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ SDValue StackSlot =
+ DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
+ Chain = DAG.getStore(
+ Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
+ OpInfo.CallOperand = StackSlot;
+ }
+
+ // There is no longer a Value* corresponding to this operand.
+ OpInfo.CallOperandVal = nullptr;
+
+ // It is now an indirect operand.
+ OpInfo.isIndirect = true;
+ }
+
+ // If this constraint is for a specific register, allocate it before
+ // anything else.
+ if (OpInfo.ConstraintType == TargetLowering::C_Register)
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
+ }
+
+ // Second pass - Loop over all of the operands, assigning virtual or physregs
+ // to register class operands.
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ // C_Register operands have already been allocated, Other/Memory don't need
+ // to be.
+ if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+ GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
+ }
+
+ // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+ std::vector<SDValue> AsmNodeOperands;
+ AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
+ AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
+ IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
+
+ // If we have a !srcloc metadata node associated with it, we want to attach
+ // this to the ultimately generated inline asm machineinstr. To do this, we
+ // pass in the third operand as this (potentially null) inline asm MDNode.
+ const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+ AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+
+ // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+ // bits as operand 3.
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ if (CS.isConvergent())
+ ExtraInfo |= InlineAsm::Extra_IsConvergent;
+ // Set the asm dialect.
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+
+ // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+ // Compute the constraint code and ConstraintType to use.
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+ // Ideally, we would only check against memory constraints. However, the
+ // meaning of an other constraint can be target-specific and we can't easily
+ // reason about it. Therefore, be conservative and set MayLoad/MayStore
+ // for other constriants as well.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.Type == InlineAsm::isInput)
+ ExtraInfo |= InlineAsm::Extra_MayLoad;
+ else if (OpInfo.Type == InlineAsm::isOutput)
+ ExtraInfo |= InlineAsm::Extra_MayStore;
+ else if (OpInfo.Type == InlineAsm::isClobber)
+ ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+ }
+ }
+
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+
+ // Loop over all of the inputs, copying the operand values into the
+ // appropriate registers and processing the output regs.
+ RegsForValue RetValRegs;
+
+ // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+ std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+ SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput: {
+ if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+ OpInfo.ConstraintType != TargetLowering::C_Register) {
+ // Memory output, or 'other' output (e.g. 'X' constraint).
+ assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ // Add information to the INLINEASM node to know about this output.
+ unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
+ MVT::i32));
+ AsmNodeOperands.push_back(OpInfo.CallOperand);
+ break;
+ }
+
+ // Otherwise, this is a register or register class output.
+
+ // Copy the output from the appropriate register. Find a register that
+ // we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ emitInlineAsmError(
+ CS, "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ // If this is an indirect operand, store through the pointer after the
+ // asm.
+ if (OpInfo.isIndirect) {
+ IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+ OpInfo.CallOperandVal));
+ } else {
+ // This is the result value of the call.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ // Concatenate this output onto the outputs list.
+ RetValRegs.append(OpInfo.AssignedRegs);
+ }
+
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs
+ .AddInlineAsmOperands(OpInfo.isEarlyClobber
+ ? InlineAsm::Kind_RegDefEarlyClobber
+ : InlineAsm::Kind_RegDef,
+ false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isInput: {
+ SDValue InOperandVal = OpInfo.CallOperand;
+
+ if (OpInfo.isMatchingInputConstraint()) { // Matching constraint?
+ // If this is required to match an output register we have already set,
+ // just use its register.
+ unsigned OperandNo = OpInfo.getMatchedOperand();
+
+ // Scan until we find the definition we already emitted of this operand.
+ // When we find it, create a RegsForValue operand.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ for (; OperandNo; --OperandNo) {
+ // Advance to the next operand.
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ assert((InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+ InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
+ CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+ }
+
+ unsigned OpFlag =
+ cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+ if (InlineAsm::isRegDefKind(OpFlag) ||
+ InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
+ // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+ if (OpInfo.isIndirect) {
+ // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+ emitInlineAsmError(CS, "inline asm not supported yet:"
+ " don't know how to handle tied "
+ "indirect register inputs");
+ return;
+ }
+
+ RegsForValue MatchedRegs;
+ MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+ MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
+ MatchedRegs.RegVTs.push_back(RegVT);
+ MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+ for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+ i != e; ++i) {
+ if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
+ MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
+ else {
+ emitInlineAsmError(
+ CS, "inline asm error: This value"
+ " type register class is not natively supported!");
+ return;
+ }
+ }
+ SDLoc dl = getCurSDLoc();
+ // Use the produced MatchedRegs object to
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
+ Chain, &Flag, CS.getInstruction());
+ MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
+ true, OpInfo.getMatchedOperand(), dl,
+ DAG, AsmNodeOperands);
+ break;
+ }
+
+ assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+ assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+ "Unexpected number of operands");
+ // Add information to the INLINEASM node to know about this input.
+ // See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
+ OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+ OpInfo.getMatchedOperand());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+ AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+ break;
+ }
+
+ // Treat indirect 'X' constraint as memory.
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
+ OpInfo.ConstraintType = TargetLowering::C_Memory;
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ std::vector<SDValue> Ops;
+ TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
+ Ops, DAG);
+ if (Ops.empty()) {
+ emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+ AsmNodeOperands.push_back(DAG.getTargetConstant(
+ ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+ AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ break;
+ }
+
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+ assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ assert(InOperandVal.getValueType() ==
+ TLI.getPointerTy(DAG.getDataLayout()) &&
+ "Memory operands expect pointer values");
+
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
+ // Add information to the INLINEASM node to know about this input.
+ unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+ ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+ getCurSDLoc(),
+ MVT::i32));
+ AsmNodeOperands.push_back(InOperandVal);
+ break;
+ }
+
+ assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ "Unknown constraint type!");
+
+ // TODO: Support this.
+ if (OpInfo.isIndirect) {
+ emitInlineAsmError(
+ CS, "Don't know how to handle indirect register inputs yet "
+ "for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ // Copy the input into the appropriate registers.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
+ SDLoc dl = getCurSDLoc();
+
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
+ Chain, &Flag, CS.getInstruction());
+
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
+ dl, DAG, AsmNodeOperands);
+ break;
+ }
+ case InlineAsm::isClobber: {
+ // Add the clobbered value to the operand list, so that the register
+ // allocator is aware that the physreg got clobbered.
+ if (!OpInfo.AssignedRegs.Regs.empty())
+ OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
+ false, 0, getCurSDLoc(), DAG,
+ AsmNodeOperands);
+ break;
+ }
+ }
+ }
+
+ // Finish up input operands. Set the input chain and add the flag last.
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+ Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(),
+ DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
+ Flag = Chain.getValue(1);
+
+ // If this asm returns a register value, copy the result from that register
+ // and set it as the value of the call.
+ if (!RetValRegs.Regs.empty()) {
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
+ Chain, &Flag, CS.getInstruction());
+
+ // FIXME: Why don't we do this for inline asms with MRVs?
+ if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+ EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
+
+ // If any of the results of the inline asm is a vector, it may have the
+ // wrong width/num elts. This can happen for register classes that can
+ // contain multiple different value types. The preg or vreg allocated may
+ // not have the same VT as was expected. Convert it to the right type
+ // with bit_convert.
+ if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+ Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(),
+ ResultType, Val);
+
+ } else if (ResultType != Val.getValueType() &&
+ ResultType.isInteger() && Val.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result may
+ // have a wider width than the expected result. Extract the relevant
+ // portion.
+ Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val);
+ }
+
+ assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+ }
+
+ setValue(CS.getInstruction(), Val);
+ // Don't need to use this as a chain in this case.
+ if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+ return;
+ }
+
+ std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+
+ // Process indirect outputs, first output all of the flagged copies out of
+ // physregs.
+ for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+ RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+ const Value *Ptr = IndirectStoresToEmit[i].second;
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
+ Chain, &Flag, IA);
+ StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+ }
+
+ // Emit the non-flagged stores from the physregs.
+ SmallVector<SDValue, 8> OutChains;
+ for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
+ SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first,
+ getValue(StoresToEmit[i].second),
+ MachinePointerInfo(StoresToEmit[i].second));
+ OutChains.push_back(Val);
+ }
+
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
+
+ DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
+ const Twine &Message) {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(), Message);
+
+ // Make sure we leave the DAG in a valid state
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType());
+ setValue(CS.getInstruction(), DAG.getUNDEF(VT));
+}
+
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ getCurSDLoc(), getRoot(), getValue(I.getOperand(0)),
+ DAG.getSrcValue(I.getOperand(0)),
+ DL.getABITypeAlignment(I.getType()));
+ setValue(&I, V);
+ DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+ DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
+ MVT::Other, getRoot(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getSrcValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(1))));
+}
+
+SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
+ const Instruction &I,
+ SDValue Op) {
+ const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
+ if (!Range)
+ return Op;
+
+ Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue();
+ if (!Lo->isNullValue())
+ return Op;
+
+ Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue();
+ unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2();
+
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
+
+ SDLoc SL = getCurSDLoc();
+
+ SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(),
+ Op, DAG.getValueType(SmallVT));
+ unsigned NumVals = Op.getNode()->getNumValues();
+ if (NumVals == 1)
+ return ZExt;
+
+ SmallVector<SDValue, 4> Ops;
+
+ Ops.push_back(ZExt);
+ for (unsigned I = 1; I != NumVals; ++I)
+ Ops.push_back(Op.getValue(I));
+
+ return DAG.getMergeValues(Ops, SL);
+}
+
+/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of
+/// the call being lowered.
+///
+/// This is a helper for lowering intrinsics that follow a target calling
+/// convention or require stack pointer adjustment. Only a subset of the
+/// intrinsic's operands need to participate in the calling convention.
+void SelectionDAGBuilder::populateCallLoweringInfo(
+ TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS,
+ unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
+ bool IsPatchPoint) {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(NumArgs);
+
+ // Populate the argument list.
+ // Attributes for args start at offset 1, after the return attribute.
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ ArgI != ArgE; ++ArgI) {
+ const Value *V = CS->getOperand(ArgI);
+
+ assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
+
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = getValue(V);
+ Entry.Ty = V->getType();
+ Entry.setAttributes(&CS, AttrI);
+ Args.push_back(Entry);
+ }
+
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args))
+ .setDiscardResult(CS->use_empty())
+ .setIsPatchPoint(IsPatchPoint);
+}
+
+/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
+/// or patchpoint target node's operand list.
+///
+/// Constants are converted to TargetConstants purely as an optimization to
+/// avoid constant materialization and register allocation.
+///
+/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
+/// generate addess computation nodes, and so ExpandISelPseudo can convert the
+/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
+/// address materialization and register allocation, but may also be required
+/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
+/// alloca in the entry block, then the runtime may assume that the alloca's
+/// StackMap location can be read immediately after compilation and that the
+/// location is valid at any point during execution (this is similar to the
+/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
+/// only available in a register, then the runtime would need to trap when
+/// execution reaches the StackMap in order to read the alloca's location.
+static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
+ const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder &Builder) {
+ for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
+ SDValue OpVal = Builder.getValue(CS.getArgument(i));
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
+ const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(
+ FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout())));
+ } else
+ Ops.push_back(OpVal);
+ }
+}
+
+/// \brief Lower llvm.experimental.stackmap directly to its target opcode.
+void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
+ // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+
+ assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
+
+ SDValue Chain, InFlag, Callee, NullPtr;
+ SmallVector<SDValue, 32> Ops;
+
+ SDLoc DL = getCurSDLoc();
+ Callee = getValue(CI.getCalledValue());
+ NullPtr = DAG.getIntPtrConstant(0, DL, true);
+
+ // The stackmap intrinsic only records the live variables (the arguemnts
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // chain, flag = CALLSEQ_START(chain, 0)
+ // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
+ // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
+ //
+ Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL);
+ InFlag = Chain.getValue(1);
+
+ // Add the <id> and <numBytes> constants.
+ SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
+ SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
+ MVT::i32));
+
+ // Push live variables for the stack map.
+ addStackMapLiveVars(&CI, 2, DL, Ops, *this);
+
+ // We are not pushing any register mask info here on the operands list,
+ // because the stackmap doesn't clobber anything.
+
+ // Push the chain and the glue flag.
+ Ops.push_back(Chain);
+ Ops.push_back(InFlag);
+
+ // Create the STACKMAP node.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
+ Chain = SDValue(SM, 0);
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
+
+ // Stackmaps don't generate values, so nothing goes into the NodeMap.
+
+ // Set the root to the target-lowered call chain.
+ DAG.setRoot(Chain);
+
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo()->setHasStackMap();
+}
+
+/// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
+void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
+ const BasicBlock *EHPadBB) {
+ // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
+ // i32 <numBytes>,
+ // i8* <target>,
+ // i32 <numArgs>,
+ // [Args...],
+ // [live variables...])
+
+ CallingConv::ID CC = CS.getCallingConv();
+ bool IsAnyRegCC = CC == CallingConv::AnyReg;
+ bool HasDef = !CS->getType()->isVoidTy();
+ SDLoc dl = getCurSDLoc();
+ SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
+
+ // Handle immediate and symbolic callees.
+ if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
+ Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
+ /*isTarget=*/true);
+ else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
+ SDLoc(SymbolicCallee),
+ SymbolicCallee->getValueType(0));
+
+ // Get the real number of arguments participating in the call <numArgs>
+ SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
+ unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
+
+ // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
+ // Intrinsics include all meta-operands up to but not including CC.
+ unsigned NumMetaOpers = PatchPointOpers::CCPos;
+ assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
+ "Not enough arguments provided to the patchpoint intrinsic");
+
+ // For AnyRegCC the arguments are lowered later on manually.
+ unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
+ Type *ReturnTy =
+ IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
+ true);
+ std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
+
+ SDNode *CallEnd = Result.second.getNode();
+ if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
+ CallEnd = CallEnd->getOperand(0).getNode();
+
+ /// Get a call instruction from the call sequence chain.
+ /// Tail calls are not allowed.
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
+ "Expected a callseq node.");
+ SDNode *Call = CallEnd->getOperand(0).getNode();
+ bool HasGlue = Call->getGluedNode();
+
+ // Replace the target specific call node with the patchable intrinsic.
+ SmallVector<SDValue, 8> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
+ SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(DAG.getTargetConstant(
+ cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
+ MVT::i32));
+
+ // Add the callee.
+ Ops.push_back(Callee);
+
+ // Adjust <numArgs> to account for any arguments that have been passed on the
+ // stack instead.
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
+ NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
+
+ // Add the calling convention
+ Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
+
+ // Add the arguments we omitted previously. The register allocator should
+ // place these in any free register.
+ if (IsAnyRegCC)
+ for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
+ Ops.push_back(getValue(CS.getArgument(i)));
+
+ // Push the arguments from the call instruction up to the register mask.
+ SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
+ Ops.append(Call->op_begin() + 2, e);
+
+ // Push live variables for the stack map.
+ addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
+
+ // Push the register mask info.
+ if (HasGlue)
+ Ops.push_back(*(Call->op_end()-2));
+ else
+ Ops.push_back(*(Call->op_end()-1));
+
+ // Push the chain (this is originally the first operand of the call, but
+ // becomes now the last or second to last operand).
+ Ops.push_back(*(Call->op_begin()));
+
+ // Push the glue flag (last operand).
+ if (HasGlue)
+ Ops.push_back(*(Call->op_end()-1));
+
+ SDVTList NodeTys;
+ if (IsAnyRegCC && HasDef) {
+ // Create the return types based on the intrinsic definition
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 3> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
+ assert(ValueVTs.size() == 1 && "Expected only one return value type.");
+
+ // There is always a chain and a glue type at the end
+ ValueVTs.push_back(MVT::Other);
+ ValueVTs.push_back(MVT::Glue);
+ NodeTys = DAG.getVTList(ValueVTs);
+ } else
+ NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ // Replace the target specific call node with a PATCHPOINT node.
+ MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
+ dl, NodeTys, Ops);
+
+ // Update the NodeMap.
+ if (HasDef) {
+ if (IsAnyRegCC)
+ setValue(CS.getInstruction(), SDValue(MN, 0));
+ else
+ setValue(CS.getInstruction(), Result.first);
+ }
+
+ // Fixup the consumers of the intrinsic. The chain and glue may be used in the
+ // call sequence. Furthermore the location of the chain and glue can change
+ // when the AnyReg calling convention is used and the intrinsic returns a
+ // value.
+ if (IsAnyRegCC && HasDef) {
+ SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
+ SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ } else
+ DAG.ReplaceAllUsesWith(Call, MN);
+ DAG.DeleteNode(Call);
+
+ // Inform the Frame Information that we have a patchpoint in this function.
+ FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+}
+
+/// Returns an AttributeSet representing the attributes applied to the return
+/// value of the given call.
+static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
+ Attrs);
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
+ // Handle the incoming return values from the call.
+ CLI.Ins.clear();
+ Type *OrigRetTy = CLI.RetTy;
+ SmallVector<EVT, 4> RetTys;
+ SmallVector<uint64_t, 4> Offsets;
+ auto &DL = CLI.DAG.getDataLayout();
+ ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
+
+ bool CanLowerReturn =
+ this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
+ CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
+ if (!CanLowerReturn) {
+ // FIXME: equivalent assert?
+ // assert(!CS.hasInAllocaArgument() &&
+ // "sret demotion is incompatible with inalloca");
+ uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
+ unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
+ MachineFunction &MF = CLI.DAG.getMachineFunction();
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
+
+ DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
+ ArgListEntry Entry;
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isInReg = false;
+ Entry.isSRet = true;
+ Entry.isNest = false;
+ Entry.isByVal = false;
+ Entry.isReturned = false;
+ Entry.isSwiftSelf = false;
+ Entry.isSwiftError = false;
+ Entry.Alignment = Align;
+ CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+ CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+
+ // sret demotion isn't compatible with tail-calls, since the sret argument
+ // points into the callers stack frame.
+ CLI.IsTailCall = false;
+ } else {
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // We push in swifterror return as the last element of CLI.Ins.
+ ArgListTy &Args = CLI.getArgs();
+ if (supportSwiftError()) {
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ if (Args[i].isSwiftError) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = getPointerTy(DL);
+ MyFlags.ArgVT = EVT(getPointerTy(DL));
+ MyFlags.Flags.setSwiftError();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+ }
+
+ // Handle all of the outgoing arguments.
+ CLI.Outs.clear();
+ CLI.OutVals.clear();
+ for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
+ Type *FinalType = Args[i].Ty;
+ if (Args[i].isByVal)
+ FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+ FinalType, CLI.CallConv, CLI.IsVarArg);
+ for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
+ ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
+ SDValue Op = SDValue(Args[i].Node.getNode(),
+ Args[i].Node.getResNo() + Value);
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+ if (Args[i].isZExt)
+ Flags.setZExt();
+ if (Args[i].isSExt)
+ Flags.setSExt();
+ if (Args[i].isInReg)
+ Flags.setInReg();
+ if (Args[i].isSRet)
+ Flags.setSRet();
+ if (Args[i].isSwiftSelf)
+ Flags.setSwiftSelf();
+ if (Args[i].isSwiftError)
+ Flags.setSwiftError();
+ if (Args[i].isByVal)
+ Flags.setByVal();
+ if (Args[i].isInAlloca) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (Args[i].isByVal || Args[i].isInAlloca) {
+ PointerType *Ty = cast<PointerType>(Args[i].Ty);
+ Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+ // For ByVal, alignment should come from FE. BE will guess if this
+ // info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (Args[i].Alignment)
+ FrameAlign = Args[i].Alignment;
+ else
+ FrameAlign = getByValTypeAlignment(ElementTy, DL);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (Args[i].isNest)
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (Args[i].isSExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (Args[i].isZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // Conservatively only handle 'returned' on non-vectors for now
+ if (Args[i].isReturned && !Op.getValueType().isVector()) {
+ assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
+ "unexpected use of 'returned'");
+ // Before passing 'returned' to the target lowering code, ensure that
+ // either the register MVT and the actual EVT are the same size or that
+ // the return value and argument are extended in the same way; in these
+ // cases it's safe to pass the argument register value unchanged as the
+ // return register value (although it's at the target's option whether
+ // to do so)
+ // TODO: allow code generation to take advantage of partially preserved
+ // registers rather than clobbering the entire register when the
+ // parameter extension method is not compatible with the return
+ // extension method
+ if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
+ (ExtendKind != ISD::ANY_EXTEND &&
+ CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt))
+ Flags.setReturned();
+ }
+
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
+ CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind);
+
+ for (unsigned j = 0; j != NumParts; ++j) {
+ // if it isn't first piece, alignment must be 1
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
+ i < CLI.NumFixedArgs,
+ i, j*Parts[j].getValueType().getStoreSize());
+ if (NumParts > 1 && j == 0)
+ MyFlags.Flags.setSplit();
+ else if (j != 0) {
+ MyFlags.Flags.setOrigAlign(1);
+ if (j == NumParts - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
+
+ CLI.Outs.push_back(MyFlags);
+ CLI.OutVals.push_back(Parts[j]);
+ }
+
+ if (NeedsRegBlock && Value == NumValues - 1)
+ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
+ }
+ }
+
+ SmallVector<SDValue, 4> InVals;
+ CLI.Chain = LowerCall(CLI, InVals);
+
+ // Update CLI.InVals to use outside of this function.
+ CLI.InVals = InVals;
+
+ // Verify that the target's LowerCall behaved as expected.
+ assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
+ "LowerCall didn't return a valid chain!");
+ assert((!CLI.IsTailCall || InVals.empty()) &&
+ "LowerCall emitted a return value for a tail call!");
+ assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
+ "LowerCall didn't emit the correct number of values!");
+
+ // For a tail call, the return value is merely live-out and there aren't
+ // any nodes in the DAG representing it. Return a special value to
+ // indicate that a tail call has been emitted and no more Instructions
+ // should be processed in the current block.
+ if (CLI.IsTailCall) {
+ CLI.DAG.setRoot(CLI.Chain);
+ return std::make_pair(SDValue(), SDValue());
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() && "LowerCall emitted a null value!");
+ assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerCall emitted a value with the wrong type!");
+ }
+#endif
+
+ SmallVector<SDValue, 4> ReturnValues;
+ if (!CanLowerReturn) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
+
+ ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+
+ unsigned NumValues = RetTys.size();
+ ReturnValues.resize(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDNodeFlags Flags;
+ Flags.setNoUnsignedWrap(true);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
+ CLI.DAG.getConstant(Offsets[i], CLI.DL,
+ PtrVT), &Flags);
+ SDValue L = CLI.DAG.getLoad(
+ RetTys[i], CLI.DL, CLI.Chain, Add,
+ MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
+ DemoteStackIdx, Offsets[i]),
+ /* Alignment = */ 1);
+ ReturnValues[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
+ } else {
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ Optional<ISD::NodeType> AssertOp;
+ if (CLI.RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (CLI.RetZExt)
+ AssertOp = ISD::AssertZext;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+
+ ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+ NumRegs, RegisterVT, VT, nullptr,
+ AssertOp));
+ CurReg += NumRegs;
+ }
+
+ // For a function returning void, there is no return value. We can't create
+ // such a node, so we just return a null return value in that case. In
+ // that case, nothing will actually look at the value.
+ if (ReturnValues.empty())
+ return std::make_pair(SDValue(), CLI.Chain);
+ }
+
+ SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
+ CLI.DAG.getVTList(RetTys), ReturnValues);
+ return std::make_pair(Res, CLI.Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ if (SDValue Res = LowerOperation(SDValue(N, 0), DAG))
+ Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ llvm_unreachable("LowerOperation not implemented for this target!");
+}
+
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
+ SDValue Op = getNonRegisterValue(V);
+ assert((Op.getOpcode() != ISD::CopyFromReg ||
+ cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+ "Copy from a reg to the same reg!");
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+ V->getType());
+ SDValue Chain = DAG.getEntryNode();
+
+ ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
+ FuncInfo.PreferredExtendType.end())
+ ? ISD::ANY_EXTEND
+ : FuncInfo.PreferredExtendType[V];
+ RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
+ PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true. This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
+ // With FastISel active, we may be splitting blocks, so force creation
+ // of virtual registers for all non-dead arguments.
+ if (FastISel)
+ return A->use_empty();
+
+ const BasicBlock &Entry = A->getParent()->front();
+ for (const User *U : A->users())
+ if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
+ return false; // Use not in entry block.
+
+ return true;
+}
+
+void SelectionDAGISel::LowerArguments(const Function &F) {
+ SelectionDAG &DAG = SDB->DAG;
+ SDLoc dl = SDB->getCurSDLoc();
+ const DataLayout &DL = DAG.getDataLayout();
+ SmallVector<ISD::InputArg, 16> Ins;
+
+ if (!FuncInfo->CanLowerReturn) {
+ // Put in an sret pointer parameter before all the other parameters.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(),
+ PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+ // NOTE: Assuming that a pointer will never break down to more than one VT
+ // or one register.
+ ISD::ArgFlagsTy Flags;
+ Flags.setSRet();
+ MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
+ ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
+ ISD::InputArg::NoArgIndex, 0);
+ Ins.push_back(RetArg);
+ }
+
+ // Set up the incoming argument description vector.
+ unsigned Idx = 1;
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I, ++Idx) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
+ bool isArgValueUsed = !I->use_empty();
+ unsigned PartBase = 0;
+ Type *FinalType = I->getType();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ FinalType = cast<PointerType>(FinalType)->getElementType();
+ bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
+ FinalType, F.getCallingConv(), F.isVarArg());
+ for (unsigned Value = 0, NumValues = ValueVTs.size();
+ Value != NumValues; ++Value) {
+ EVT VT = ValueVTs[Value];
+ Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+ Flags.setZExt();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+ Flags.setSExt();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
+ Flags.setInReg();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
+ Flags.setSRet();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
+ Flags.setSwiftSelf();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError))
+ Flags.setSwiftError();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ Flags.setByVal();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
+ Flags.setInAlloca();
+ // Set the byval flag for CCAssignFn callbacks that don't know about
+ // inalloca. This way we can know how many bytes we should've allocated
+ // and how many bytes a callee cleanup function will pop. If we port
+ // inalloca to more targets, we'll have to add custom inalloca handling
+ // in the various CC lowering callbacks.
+ Flags.setByVal();
+ }
+ if (F.getCallingConv() == CallingConv::X86_INTR) {
+ // IA Interrupt passes frame (1st parameter) by value in the stack.
+ if (Idx == 1)
+ Flags.setByVal();
+ }
+ if (Flags.isByVal() || Flags.isInAlloca()) {
+ PointerType *Ty = cast<PointerType>(I->getType());
+ Type *ElementTy = Ty->getElementType();
+ Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+ // For ByVal, alignment should be passed from FE. BE will guess if
+ // this info is not there but there are cases it cannot get right.
+ unsigned FrameAlign;
+ if (F.getParamAlignment(Idx))
+ FrameAlign = F.getParamAlignment(Idx);
+ else
+ FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
+ Flags.setByValAlign(FrameAlign);
+ }
+ if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
+ Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
+ Idx-1, PartBase+i*RegisterVT.getStoreSize());
+ if (NumRegs > 1 && i == 0)
+ MyFlags.Flags.setSplit();
+ // if it isn't first piece, alignment must be 1
+ else if (i > 0) {
+ MyFlags.Flags.setOrigAlign(1);
+ if (i == NumRegs - 1)
+ MyFlags.Flags.setSplitEnd();
+ }
+ Ins.push_back(MyFlags);
+ }
+ if (NeedsRegBlock && Value == NumValues - 1)
+ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
+ PartBase += VT.getStoreSize();
+ }
+ }
+
+ // Call the target to set up the argument values.
+ SmallVector<SDValue, 8> InVals;
+ SDValue NewRoot = TLI->LowerFormalArguments(
+ DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
+
+ // Verify that the target's LowerFormalArguments behaved as expected.
+ assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+ "LowerFormalArguments didn't return a valid chain!");
+ assert(InVals.size() == Ins.size() &&
+ "LowerFormalArguments didn't emit the correct number of values!");
+ DEBUG({
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert(InVals[i].getNode() &&
+ "LowerFormalArguments emitted a null value!");
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+ "LowerFormalArguments emitted a value with the wrong type!");
+ }
+ });
+
+ // Update the DAG with the new chain value resulting from argument lowering.
+ DAG.setRoot(NewRoot);
+
+ // Set up the argument values.
+ unsigned i = 0;
+ Idx = 1;
+ if (!FuncInfo->CanLowerReturn) {
+ // Create a virtual register for the sret pointer, and put in a copy
+ // from the sret argument into it.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(),
+ PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ MVT VT = ValueVTs[0].getSimpleVT();
+ MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
+ Optional<ISD::NodeType> AssertOp = None;
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
+ RegVT, VT, nullptr, AssertOp);
+
+ MachineFunction& MF = SDB->DAG.getMachineFunction();
+ MachineRegisterInfo& RegInfo = MF.getRegInfo();
+ unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
+ FuncInfo->DemoteRegister = SRetReg;
+ NewRoot =
+ SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
+ DAG.setRoot(NewRoot);
+
+ // i indexes lowered arguments. Bump it past the hidden sret argument.
+ // Idx indexes LLVM arguments. Don't touch it.
+ ++i;
+ }
+
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+ ++I, ++Idx) {
+ SmallVector<SDValue, 4> ArgValues;
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+
+ // If this argument is unused then remember its value. It is used to generate
+ // debugging information.
+ if (I->use_empty() && NumValues) {
+ SDB->setUnusedArgValue(&*I, InVals[i]);
+
+ // Also remember any frame index for use in FastISel.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ }
+
+ for (unsigned Val = 0; Val != NumValues; ++Val) {
+ EVT VT = ValueVTs[Val];
+ MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
+ unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+
+ if (!I->use_empty()) {
+ Optional<ISD::NodeType> AssertOp;
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
+ AssertOp = ISD::AssertSext;
+ else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
+ AssertOp = ISD::AssertZext;
+
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
+ NumParts, PartVT, VT,
+ nullptr, AssertOp));
+ }
+
+ i += NumParts;
+ }
+
+ // We don't need to do anything else for unused arguments.
+ if (ArgValues.empty())
+ continue;
+
+ // Note down frame index.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+
+ SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
+ SDB->getCurSDLoc());
+
+ SDB->setValue(&*I, Res);
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ if (LoadSDNode *LNode =
+ dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ }
+
+ // Update SwiftErrorMap.
+ if (Res.getOpcode() == ISD::CopyFromReg && TLI->supportSwiftError() &&
+ F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) {
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB][0] = Reg;
+ }
+
+ // If this argument is live outside of the entry block, insert a copy from
+ // wherever we got it to the vreg that other BB's will reference it as.
+ if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ // If we can, though, try to skip creating an unnecessary vreg.
+ // FIXME: This isn't very clean... it would be nice to make this more
+ // general. It's also subtly incompatible with the hacks FastISel
+ // uses with vregs.
+ unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ FuncInfo->ValueMap[&*I] = Reg;
+ continue;
+ }
+ }
+ if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&*I);
+ SDB->CopyToExportRegsIfNeeded(&*I);
+ }
+ }
+
+ assert(i == InVals.size() && "Argument register count mismatch!");
+
+ // Finally, if the target has anything special to do, allow it to do so.
+ EmitFunctionEntryCode();
+}
+
+/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
+/// ensure constants are generated when needed. Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input. We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB. As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TerminatorInst *TI = LLVMBB->getTerminator();
+
+ SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+ // Check PHI nodes in successors that expect a value to be available from this
+ // block.
+ for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+ const BasicBlock *SuccBB = TI->getSuccessor(succ);
+ if (!isa<PHINode>(SuccBB->begin())) continue;
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+ // If this terminator has multiple identical successors (common for
+ // switches), only handle each succ once.
+ if (!SuccsHandled.insert(SuccMBB).second)
+ continue;
+
+ MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+ // At this point we know that there is a 1-1 correspondence between LLVM PHI
+ // nodes and Machine PHI nodes, but the incoming operands have not been
+ // emitted yet.
+ for (BasicBlock::const_iterator I = SuccBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ // Ignore dead phi's.
+ if (PN->use_empty()) continue;
+
+ // Skip empty types
+ if (PN->getType()->isEmptyTy())
+ continue;
+
+ unsigned Reg;
+ const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+ if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+ unsigned &RegOut = ConstantsOut[C];
+ if (RegOut == 0) {
+ RegOut = FuncInfo.CreateRegs(C->getType());
+ CopyValueToVirtualRegister(C, RegOut);
+ }
+ Reg = RegOut;
+ } else {
+ DenseMap<const Value *, unsigned>::iterator I =
+ FuncInfo.ValueMap.find(PHIOp);
+ if (I != FuncInfo.ValueMap.end())
+ Reg = I->second;
+ else {
+ assert(isa<AllocaInst>(PHIOp) &&
+ FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+ "Didn't codegen value into a register!??");
+ Reg = FuncInfo.CreateRegs(PHIOp->getType());
+ CopyValueToVirtualRegister(PHIOp, Reg);
+ }
+ }
+
+ // Remember that this register needs to added to the machine PHI node as
+ // the input for this MBB.
+ SmallVector<EVT, 4> ValueVTs;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs);
+ for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+ EVT VT = ValueVTs[vti];
+ unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+ for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+ FuncInfo.PHINodesToUpdate.push_back(
+ std::make_pair(&*MBBI++, Reg + i));
+ Reg += NumRegisters;
+ }
+ }
+ }
+
+ ConstantsOut.clear();
+}
+
+/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
+/// is 0.
+MachineBasicBlock *
+SelectionDAGBuilder::StackProtectorDescriptor::
+AddSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ bool IsLikely,
+ MachineBasicBlock *SuccMBB) {
+ // If SuccBB has not been created yet, create it.
+ if (!SuccMBB) {
+ MachineFunction *MF = ParentMBB->getParent();
+ MachineFunction::iterator BBI(ParentMBB);
+ SuccMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++BBI, SuccMBB);
+ }
+ // Add it as a successor of ParentMBB.
+ ParentMBB->addSuccessor(
+ SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
+ return SuccMBB;
+}
+
+MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
+ MachineFunction::iterator I(MBB);
+ if (++I == FuncInfo.MF->end())
+ return nullptr;
+ return &*I;
+}
+
+/// During lowering new call nodes can be created (such as memset, etc.).
+/// Those will become new roots of the current DAG, but complications arise
+/// when they are tail calls. In such cases, the call lowering will update
+/// the root, but the builder still needs to know that a tail call has been
+/// lowered in order to avoid generating an additional return.
+void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
+ // If the node is null, we do have a tail call.
+ if (MaybeTC.getNode() != nullptr)
+ DAG.setRoot(MaybeTC);
+ else
+ HasTailCall = true;
+}
+
+bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
+ unsigned *TotalCases, unsigned First,
+ unsigned Last,
+ unsigned Density) {
+ assert(Last >= First);
+ assert(TotalCases[Last] >= TotalCases[First]);
+
+ APInt LowCase = Clusters[First].Low->getValue();
+ APInt HighCase = Clusters[Last].High->getValue();
+ assert(LowCase.getBitWidth() == HighCase.getBitWidth());
+
+ // FIXME: A range of consecutive cases has 100% density, but only requires one
+ // comparison to lower. We should discriminate against such consecutive ranges
+ // in jump tables.
+
+ uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100);
+ uint64_t Range = Diff + 1;
+
+ uint64_t NumCases =
+ TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
+
+ assert(NumCases < UINT64_MAX / 100);
+ assert(Range >= NumCases);
+
+ return NumCases * 100 >= Range * Density;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI,
+ const SwitchInst *SI) {
+ const Function *Fn = SI->getParent()->getParent();
+ if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
+ return false;
+
+ return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+ TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
+}
+
+bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
+ unsigned First, unsigned Last,
+ const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB,
+ CaseCluster &JTCluster) {
+ assert(First <= Last);
+
+ auto Prob = BranchProbability::getZero();
+ unsigned NumCmps = 0;
+ std::vector<MachineBasicBlock*> Table;
+ DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
+
+ // Initialize probabilities in JTProbs.
+ for (unsigned I = First; I <= Last; ++I)
+ JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+
+ for (unsigned I = First; I <= Last; ++I) {
+ assert(Clusters[I].Kind == CC_Range);
+ Prob += Clusters[I].Prob;
+ APInt Low = Clusters[I].Low->getValue();
+ APInt High = Clusters[I].High->getValue();
+ NumCmps += (Low == High) ? 1 : 2;
+ if (I != First) {
+ // Fill the gap between this and the previous cluster.
+ APInt PreviousHigh = Clusters[I - 1].High->getValue();
+ assert(PreviousHigh.slt(Low));
+ uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
+ for (uint64_t J = 0; J < Gap; J++)
+ Table.push_back(DefaultMBB);
+ }
+ uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
+ for (uint64_t J = 0; J < ClusterSize; ++J)
+ Table.push_back(Clusters[I].MBB);
+ JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
+ }
+
+ unsigned NumDests = JTProbs.size();
+ if (isSuitableForBitTests(NumDests, NumCmps,
+ Clusters[First].Low->getValue(),
+ Clusters[Last].High->getValue())) {
+ // Clusters[First..Last] should be lowered as bit tests instead.
+ return false;
+ }
+
+ // Create the MBB that will load from and jump through the table.
+ // Note: We create it here, but it's not inserted into the function yet.
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *JumpTableMBB =
+ CurMF->CreateMachineBasicBlock(SI->getParent());
+
+ // Add successors. Note: use table order for determinism.
+ SmallPtrSet<MachineBasicBlock *, 8> Done;
+ for (MachineBasicBlock *Succ : Table) {
+ if (Done.count(Succ))
+ continue;
+ addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
+ Done.insert(Succ);
+ }
+ JumpTableMBB->normalizeSuccProbs();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
+ ->createJumpTableIndex(Table);
+
+ // Set up the jump table info.
+ JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
+ JumpTableHeader JTH(Clusters[First].Low->getValue(),
+ Clusters[Last].High->getValue(), SI->getCondition(),
+ nullptr, false);
+ JTCases.emplace_back(std::move(JTH), std::move(JT));
+
+ JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
+ JTCases.size() - 1, Prob);
+ return true;
+}
+
+void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
+ const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB) {
+#ifndef NDEBUG
+ // Clusters must be non-empty, sorted, and only contain Range clusters.
+ assert(!Clusters.empty());
+ for (CaseCluster &C : Clusters)
+ assert(C.Kind == CC_Range);
+ for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
+ assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!areJTsAllowed(TLI, SI))
+ return;
+
+ const int64_t N = Clusters.size();
+ const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
+
+ // TotalCases[i]: Total nbr of cases in Clusters[0..i].
+ SmallVector<unsigned, 8> TotalCases(N);
+
+ for (unsigned i = 0; i < N; ++i) {
+ APInt Hi = Clusters[i].High->getValue();
+ APInt Lo = Clusters[i].Low->getValue();
+ TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
+ if (i != 0)
+ TotalCases[i] += TotalCases[i - 1];
+ }
+
+ unsigned MinDensity = JumpTableDensity;
+ if (DefaultMBB->getParent()->getFunction()->optForSize())
+ MinDensity = OptsizeJumpTableDensity;
+ if (N >= MinJumpTableSize
+ && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) {
+ // Cheap case: the whole range might be suitable for jump table.
+ CaseCluster JTCluster;
+ if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
+ Clusters[0] = JTCluster;
+ Clusters.resize(1);
+ return;
+ }
+ }
+
+ // The algorithm below is not suitable for -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // Split Clusters into minimum number of dense partitions. The algorithm uses
+ // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
+ // for the Case Statement'" (1994), but builds the MinPartitions array in
+ // reverse order to make it easier to reconstruct the partitions in ascending
+ // order. In the choice between two optimal partitionings, it picks the one
+ // which yields more jump tables.
+
+ // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+ SmallVector<unsigned, 8> MinPartitions(N);
+ // LastElement[i] is the last element of the partition starting at i.
+ SmallVector<unsigned, 8> LastElement(N);
+ // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
+ SmallVector<unsigned, 8> NumTables(N);
+
+ // Base case: There is only one way to partition Clusters[N-1].
+ MinPartitions[N - 1] = 1;
+ LastElement[N - 1] = N - 1;
+ assert(MinJumpTableSize > 1);
+ NumTables[N - 1] = 0;
+
+ // Note: loop indexes are signed to avoid underflow.
+ for (int64_t i = N - 2; i >= 0; i--) {
+ // Find optimal partitioning of Clusters[i..N-1].
+ // Baseline: Put Clusters[i] into a partition on its own.
+ MinPartitions[i] = MinPartitions[i + 1] + 1;
+ LastElement[i] = i;
+ NumTables[i] = NumTables[i + 1];
+
+ // Search for a solution that results in fewer partitions.
+ for (int64_t j = N - 1; j > i; j--) {
+ // Try building a partition from Clusters[i..j].
+ if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) {
+ unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+ bool IsTable = j - i + 1 >= MinJumpTableSize;
+ unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
+
+ // If this j leads to fewer partitions, or same number of partitions
+ // with more lookup tables, it is a better partitioning.
+ if (NumPartitions < MinPartitions[i] ||
+ (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) {
+ MinPartitions[i] = NumPartitions;
+ LastElement[i] = j;
+ NumTables[i] = Tables;
+ }
+ }
+ }
+ }
+
+ // Iterate over the partitions, replacing some with jump tables in-place.
+ unsigned DstIndex = 0;
+ for (unsigned First = 0, Last; First < N; First = Last + 1) {
+ Last = LastElement[First];
+ assert(Last >= First);
+ assert(DstIndex <= First);
+ unsigned NumClusters = Last - First + 1;
+
+ CaseCluster JTCluster;
+ if (NumClusters >= MinJumpTableSize &&
+ buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
+ Clusters[DstIndex++] = JTCluster;
+ } else {
+ for (unsigned I = First; I <= Last; ++I)
+ std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+ }
+ }
+ Clusters.resize(DstIndex);
+}
+
+bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
+ // FIXME: Using the pointer type doesn't seem ideal.
+ uint64_t BW = DAG.getDataLayout().getPointerSizeInBits();
+ uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
+ return Range <= BW;
+}
+
+bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests,
+ unsigned NumCmps,
+ const APInt &Low,
+ const APInt &High) {
+ // FIXME: I don't think NumCmps is the correct metric: a single case and a
+ // range of cases both require only one branch to lower. Just looking at the
+ // number of clusters and destinations should be enough to decide whether to
+ // build bit tests.
+
+ // To lower a range with bit tests, the range must fit the bitwidth of a
+ // machine word.
+ if (!rangeFitsInWord(Low, High))
+ return false;
+
+ // Decide whether it's profitable to lower this range with bit tests. Each
+ // destination requires a bit test and branch, and there is an overall range
+ // check branch. For a small number of clusters, separate comparisons might be
+ // cheaper, and for many destinations, splitting the range might be better.
+ return (NumDests == 1 && NumCmps >= 3) ||
+ (NumDests == 2 && NumCmps >= 5) ||
+ (NumDests == 3 && NumCmps >= 6);
+}
+
+bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
+ unsigned First, unsigned Last,
+ const SwitchInst *SI,
+ CaseCluster &BTCluster) {
+ assert(First <= Last);
+ if (First == Last)
+ return false;
+
+ BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+ unsigned NumCmps = 0;
+ for (int64_t I = First; I <= Last; ++I) {
+ assert(Clusters[I].Kind == CC_Range);
+ Dests.set(Clusters[I].MBB->getNumber());
+ NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ }
+ unsigned NumDests = Dests.count();
+
+ APInt Low = Clusters[First].Low->getValue();
+ APInt High = Clusters[Last].High->getValue();
+ assert(Low.slt(High));
+
+ if (!isSuitableForBitTests(NumDests, NumCmps, Low, High))
+ return false;
+
+ APInt LowBound;
+ APInt CmpRange;
+
+ const int BitWidth = DAG.getTargetLoweringInfo()
+ .getPointerTy(DAG.getDataLayout())
+ .getSizeInBits();
+ assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
+
+ // Check if the clusters cover a contiguous range such that no value in the
+ // range will jump to the default statement.
+ bool ContiguousRange = true;
+ for (int64_t I = First + 1; I <= Last; ++I) {
+ if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+ ContiguousRange = false;
+ break;
+ }
+ }
+
+ if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+ // Optimize the case where all the case values fit in a word without having
+ // to subtract minValue. In this case, we can optimize away the subtraction.
+ LowBound = APInt::getNullValue(Low.getBitWidth());
+ CmpRange = High;
+ ContiguousRange = false;
+ } else {
+ LowBound = Low;
+ CmpRange = High - Low;
+ }
+
+ CaseBitsVector CBV;
+ auto TotalProb = BranchProbability::getZero();
+ for (unsigned i = First; i <= Last; ++i) {
+ // Find the CaseBits for this destination.
+ unsigned j;
+ for (j = 0; j < CBV.size(); ++j)
+ if (CBV[j].BB == Clusters[i].MBB)
+ break;
+ if (j == CBV.size())
+ CBV.push_back(
+ CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
+ CaseBits *CB = &CBV[j];
+
+ // Update Mask, Bits and ExtraProb.
+ uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
+ uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
+ assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
+ CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
+ CB->Bits += Hi - Lo + 1;
+ CB->ExtraProb += Clusters[i].Prob;
+ TotalProb += Clusters[i].Prob;
+ }
+
+ BitTestInfo BTI;
+ std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
+ // Sort by probability first, number of bits second.
+ if (a.ExtraProb != b.ExtraProb)
+ return a.ExtraProb > b.ExtraProb;
+ return a.Bits > b.Bits;
+ });
+
+ for (auto &CB : CBV) {
+ MachineBasicBlock *BitTestBB =
+ FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
+ BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
+ }
+ BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
+ SI->getCondition(), -1U, MVT::Other, false,
+ ContiguousRange, nullptr, nullptr, std::move(BTI),
+ TotalProb);
+
+ BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
+ BitTestCases.size() - 1, TotalProb);
+ return true;
+}
+
+void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
+ const SwitchInst *SI) {
+// Partition Clusters into as few subsets as possible, where each subset has a
+// range that fits in a machine word and has <= 3 unique destinations.
+
+#ifndef NDEBUG
+ // Clusters must be sorted and contain Range or JumpTable clusters.
+ assert(!Clusters.empty());
+ assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
+ for (const CaseCluster &C : Clusters)
+ assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
+ for (unsigned i = 1; i < Clusters.size(); ++i)
+ assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+ // The algorithm below is not suitable for -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
+ if (!TLI.isOperationLegal(ISD::SHL, PTy))
+ return;
+
+ int BitWidth = PTy.getSizeInBits();
+ const int64_t N = Clusters.size();
+
+ // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+ SmallVector<unsigned, 8> MinPartitions(N);
+ // LastElement[i] is the last element of the partition starting at i.
+ SmallVector<unsigned, 8> LastElement(N);
+
+ // FIXME: This might not be the best algorithm for finding bit test clusters.
+
+ // Base case: There is only one way to partition Clusters[N-1].
+ MinPartitions[N - 1] = 1;
+ LastElement[N - 1] = N - 1;
+
+ // Note: loop indexes are signed to avoid underflow.
+ for (int64_t i = N - 2; i >= 0; --i) {
+ // Find optimal partitioning of Clusters[i..N-1].
+ // Baseline: Put Clusters[i] into a partition on its own.
+ MinPartitions[i] = MinPartitions[i + 1] + 1;
+ LastElement[i] = i;
+
+ // Search for a solution that results in fewer partitions.
+ // Note: the search is limited by BitWidth, reducing time complexity.
+ for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
+ // Try building a partition from Clusters[i..j].
+
+ // Check the range.
+ if (!rangeFitsInWord(Clusters[i].Low->getValue(),
+ Clusters[j].High->getValue()))
+ continue;
+
+ // Check nbr of destinations and cluster types.
+ // FIXME: This works, but doesn't seem very efficient.
+ bool RangesOnly = true;
+ BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+ for (int64_t k = i; k <= j; k++) {
+ if (Clusters[k].Kind != CC_Range) {
+ RangesOnly = false;
+ break;
+ }
+ Dests.set(Clusters[k].MBB->getNumber());
+ }
+ if (!RangesOnly || Dests.count() > 3)
+ break;
+
+ // Check if it's a better partition.
+ unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+ if (NumPartitions < MinPartitions[i]) {
+ // Found a better partition.
+ MinPartitions[i] = NumPartitions;
+ LastElement[i] = j;
+ }
+ }
+ }
+
+ // Iterate over the partitions, replacing with bit-test clusters in-place.
+ unsigned DstIndex = 0;
+ for (unsigned First = 0, Last; First < N; First = Last + 1) {
+ Last = LastElement[First];
+ assert(First <= Last);
+ assert(DstIndex <= First);
+
+ CaseCluster BitTestCluster;
+ if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
+ Clusters[DstIndex++] = BitTestCluster;
+ } else {
+ size_t NumClusters = Last - First + 1;
+ std::memmove(&Clusters[DstIndex], &Clusters[First],
+ sizeof(Clusters[0]) * NumClusters);
+ DstIndex += NumClusters;
+ }
+ }
+ Clusters.resize(DstIndex);
+}
+
+void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB) {
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *NextMBB = nullptr;
+ MachineFunction::iterator BBI(W.MBB);
+ if (++BBI != FuncInfo.MF->end())
+ NextMBB = &*BBI;
+
+ unsigned Size = W.LastCluster - W.FirstCluster + 1;
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+
+ if (Size == 2 && W.MBB == SwitchMBB) {
+ // If any two of the cases has the same destination, and if one value
+ // is the same as the other, but has one bit unset that the other has set,
+ // use bit manipulation to do two compares at once. For example:
+ // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3
+ // cases.
+ // TODO: Handle cases where W.CaseBB != SwitchBB.
+ CaseCluster &Small = *W.FirstCluster;
+ CaseCluster &Big = *W.LastCluster;
+
+ if (Small.Low == Small.High && Big.Low == Big.High &&
+ Small.MBB == Big.MBB) {
+ const APInt &SmallValue = Small.Low->getValue();
+ const APInt &BigValue = Big.Low->getValue();
+
+ // Check that there is only one bit different.
+ APInt CommonBit = BigValue ^ SmallValue;
+ if (CommonBit.isPowerOf2()) {
+ SDValue CondLHS = getValue(Cond);
+ EVT VT = CondLHS.getValueType();
+ SDLoc DL = getCurSDLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, DL, VT));
+ SDValue Cond = DAG.getSetCC(
+ DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ // Both Small and Big will jump to Small.BB, so we sum up the
+ // probabilities.
+ addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
+ if (BPI)
+ addSuccessorWithProb(
+ SwitchMBB, DefaultMBB,
+ // The default destination is the first successor in IR.
+ BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
+ else
+ addSuccessorWithProb(SwitchMBB, DefaultMBB);
+
+ // Insert the true branch.
+ SDValue BrCond =
+ DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.MBB));
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(DefaultMBB));
+
+ DAG.setRoot(BrCond);
+ return;
+ }
+ }
+ }
+
+ if (TM.getOptLevel() != CodeGenOpt::None) {
+ // Order cases by probability so the most likely case will be checked first.
+ std::sort(W.FirstCluster, W.LastCluster + 1,
+ [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Prob > b.Prob;
+ });
+
+ // Rearrange the case blocks so that the last one falls through if possible
+ // without without changing the order of probabilities.
+ for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
+ --I;
+ if (I->Prob > W.LastCluster->Prob)
+ break;
+ if (I->Kind == CC_Range && I->MBB == NextMBB) {
+ std::swap(*I, *W.LastCluster);
+ break;
+ }
+ }
+ }
+
+ // Compute total probability.
+ BranchProbability DefaultProb = W.DefaultProb;
+ BranchProbability UnhandledProbs = DefaultProb;
+ for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+ UnhandledProbs += I->Prob;
+
+ MachineBasicBlock *CurMBB = W.MBB;
+ for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+ MachineBasicBlock *Fallthrough;
+ if (I == W.LastCluster) {
+ // For the last cluster, fall through to the default destination.
+ Fallthrough = DefaultMBB;
+ } else {
+ Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
+ CurMF->insert(BBI, Fallthrough);
+ // Put Cond in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(Cond);
+ }
+ UnhandledProbs -= I->Prob;
+
+ switch (I->Kind) {
+ case CC_JumpTable: {
+ // FIXME: Optimize away range check based on pivot comparisons.
+ JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
+ JumpTable *JT = &JTCases[I->JTCasesIndex].second;
+
+ // The jump block hasn't been inserted yet; insert it here.
+ MachineBasicBlock *JumpMBB = JT->MBB;
+ CurMF->insert(BBI, JumpMBB);
+
+ auto JumpProb = I->Prob;
+ auto FallthroughProb = UnhandledProbs;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default probability to successors of CurMBB. Also
+ // update the probability on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpProb += DefaultProb / 2;
+ FallthroughProb -= DefaultProb / 2;
+ JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+ JumpMBB->normalizeSuccProbs();
+ break;
+ }
+ }
+
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+ CurMBB->normalizeSuccProbs();
+
+ // The jump table header will be inserted in our current block, do the
+ // range check, and fall through to our fallthrough block.
+ JTH->HeaderBB = CurMBB;
+ JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
+
+ // If we're in the right place, emit the jump table header right now.
+ if (CurMBB == SwitchMBB) {
+ visitJumpTableHeader(*JT, *JTH, SwitchMBB);
+ JTH->Emitted = true;
+ }
+ break;
+ }
+ case CC_BitTests: {
+ // FIXME: Optimize away range check based on pivot comparisons.
+ BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
+
+ // The bit test blocks haven't been inserted yet; insert them here.
+ for (BitTestCase &BTC : BTB->Cases)
+ CurMF->insert(BBI, BTC.ThisBB);
+
+ // Fill in fields of the BitTestBlock.
+ BTB->Parent = CurMBB;
+ BTB->Default = Fallthrough;
+
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
+ visitBitTestHeader(*BTB, SwitchMBB);
+ BTB->Emitted = true;
+ }
+ break;
+ }
+ case CC_Range: {
+ const Value *RHS, *LHS, *MHS;
+ ISD::CondCode CC;
+ if (I->Low == I->High) {
+ // Check Cond == I->Low.
+ CC = ISD::SETEQ;
+ LHS = Cond;
+ RHS=I->Low;
+ MHS = nullptr;
+ } else {
+ // Check I->Low <= Cond <= I->High.
+ CC = ISD::SETLE;
+ LHS = I->Low;
+ MHS = Cond;
+ RHS = I->High;
+ }
+
+ // The false probability is the sum of all unhandled cases.
+ CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob,
+ UnhandledProbs);
+
+ if (CurMBB == SwitchMBB)
+ visitSwitchCase(CB, SwitchMBB);
+ else
+ SwitchCases.push_back(CB);
+
+ break;
+ }
+ }
+ CurMBB = Fallthrough;
+ }
+}
+
+unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
+ CaseClusterIt First,
+ CaseClusterIt Last) {
+ return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
+ if (X.Prob != CC.Prob)
+ return X.Prob > CC.Prob;
+
+ // Ties are broken by comparing the case value.
+ return X.Low->getValue().slt(CC.Low->getValue());
+ });
+}
+
+void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
+ const SwitchWorkListItem &W,
+ Value *Cond,
+ MachineBasicBlock *SwitchMBB) {
+ assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
+ "Clusters not sorted?");
+
+ assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
+
+ // Balance the tree based on branch probabilities to create a near-optimal (in
+ // terms of search time given key frequency) binary search tree. See e.g. Kurt
+ // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
+ CaseClusterIt LastLeft = W.FirstCluster;
+ CaseClusterIt FirstRight = W.LastCluster;
+ auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
+ auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
+
+ // Move LastLeft and FirstRight towards each other from opposite directions to
+ // find a partitioning of the clusters which balances the probability on both
+ // sides. If LeftProb and RightProb are equal, alternate which side is
+ // taken to ensure 0-probability nodes are distributed evenly.
+ unsigned I = 0;
+ while (LastLeft + 1 < FirstRight) {
+ if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
+ LeftProb += (++LastLeft)->Prob;
+ else
+ RightProb += (--FirstRight)->Prob;
+ I++;
+ }
+
+ for (;;) {
+ // Our binary search tree differs from a typical BST in that ours can have up
+ // to three values in each leaf. The pivot selection above doesn't take that
+ // into account, which means the tree might require more nodes and be less
+ // efficient. We compensate for this here.
+
+ unsigned NumLeft = LastLeft - W.FirstCluster + 1;
+ unsigned NumRight = W.LastCluster - FirstRight + 1;
+
+ if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
+ // If one side has less than 3 clusters, and the other has more than 3,
+ // consider taking a cluster from the other side.
+
+ if (NumLeft < NumRight) {
+ // Consider moving the first cluster on the right to the left side.
+ CaseCluster &CC = *FirstRight;
+ unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
+ unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
+ if (LeftSideRank <= RightSideRank) {
+ // Moving the cluster to the left does not demote it.
+ ++LastLeft;
+ ++FirstRight;
+ continue;
+ }
+ } else {
+ assert(NumRight < NumLeft);
+ // Consider moving the last element on the left to the right side.
+ CaseCluster &CC = *LastLeft;
+ unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
+ unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
+ if (RightSideRank <= LeftSideRank) {
+ // Moving the cluster to the right does not demot it.
+ --LastLeft;
+ --FirstRight;
+ continue;
+ }
+ }
+ }
+ break;
+ }
+
+ assert(LastLeft + 1 == FirstRight);
+ assert(LastLeft >= W.FirstCluster);
+ assert(FirstRight <= W.LastCluster);
+
+ // Use the first element on the right as pivot since we will make less-than
+ // comparisons against it.
+ CaseClusterIt PivotCluster = FirstRight;
+ assert(PivotCluster > W.FirstCluster);
+ assert(PivotCluster <= W.LastCluster);
+
+ CaseClusterIt FirstLeft = W.FirstCluster;
+ CaseClusterIt LastRight = W.LastCluster;
+
+ const ConstantInt *Pivot = PivotCluster->Low;
+
+ // New blocks will be inserted immediately after the current one.
+ MachineFunction::iterator BBI(W.MBB);
+ ++BBI;
+
+ // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
+ // we can branch to its destination directly if it's squeezed exactly in
+ // between the known lower bound and Pivot - 1.
+ MachineBasicBlock *LeftMBB;
+ if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
+ FirstLeft->Low == W.GE &&
+ (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
+ LeftMBB = FirstLeft->MBB;
+ } else {
+ LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, LeftMBB);
+ WorkList.push_back(
+ {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
+ // Put Cond in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(Cond);
+ }
+
+ // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
+ // single cluster, RHS.Low == Pivot, and we can branch to its destination
+ // directly if RHS.High equals the current upper bound.
+ MachineBasicBlock *RightMBB;
+ if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
+ W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
+ RightMBB = FirstRight->MBB;
+ } else {
+ RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, RightMBB);
+ WorkList.push_back(
+ {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
+ // Put Cond in a virtual register to make it available from the new blocks.
+ ExportFromCurrentBlock(Cond);
+ }
+
+ // Create the CaseBlock record that will be used to lower the branch.
+ CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
+ LeftProb, RightProb);
+
+ if (W.MBB == SwitchMBB)
+ visitSwitchCase(CB, SwitchMBB);
+ else
+ SwitchCases.push_back(CB);
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+ // Extract cases from the switch.
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ CaseClusterVector Clusters;
+ Clusters.reserve(SI.getNumCases());
+ for (auto I : SI.cases()) {
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
+ const ConstantInt *CaseVal = I.getCaseValue();
+ BranchProbability Prob =
+ BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+ : BranchProbability(1, SI.getNumCases() + 1);
+ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
+ }
+
+ MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+ // Cluster adjacent cases with the same destination. We do this at all
+ // optimization levels because it's cheap to do and will make codegen faster
+ // if there are many clusters.
+ sortAndRangeify(Clusters);
+
+ if (TM.getOptLevel() != CodeGenOpt::None) {
+ // Replace an unreachable default with the most popular destination.
+ // FIXME: Exploit unreachable default more aggressively.
+ bool UnreachableDefault =
+ isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg());
+ if (UnreachableDefault && !Clusters.empty()) {
+ DenseMap<const BasicBlock *, unsigned> Popularity;
+ unsigned MaxPop = 0;
+ const BasicBlock *MaxBB = nullptr;
+ for (auto I : SI.cases()) {
+ const BasicBlock *BB = I.getCaseSuccessor();
+ if (++Popularity[BB] > MaxPop) {
+ MaxPop = Popularity[BB];
+ MaxBB = BB;
+ }
+ }
+ // Set new default.
+ assert(MaxPop > 0 && MaxBB);
+ DefaultMBB = FuncInfo.MBBMap[MaxBB];
+
+ // Remove cases that were pointing to the destination that is now the
+ // default.
+ CaseClusterVector New;
+ New.reserve(Clusters.size());
+ for (CaseCluster &CC : Clusters) {
+ if (CC.MBB != DefaultMBB)
+ New.push_back(CC);
+ }
+ Clusters = std::move(New);
+ }
+ }
+
+ // If there is only the default destination, jump there directly.
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+ if (Clusters.empty()) {
+ SwitchMBB->addSuccessor(DefaultMBB);
+ if (DefaultMBB != NextBlock(SwitchMBB)) {
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
+ }
+ return;
+ }
+
+ findJumpTables(Clusters, &SI, DefaultMBB);
+ findBitTestClusters(Clusters, &SI);
+
+ DEBUG({
+ dbgs() << "Case clusters: ";
+ for (const CaseCluster &C : Clusters) {
+ if (C.Kind == CC_JumpTable) dbgs() << "JT:";
+ if (C.Kind == CC_BitTests) dbgs() << "BT:";
+
+ C.Low->getValue().print(dbgs(), true);
+ if (C.Low != C.High) {
+ dbgs() << '-';
+ C.High->getValue().print(dbgs(), true);
+ }
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+ });
+
+ assert(!Clusters.empty());
+ SwitchWorkList WorkList;
+ CaseClusterIt First = Clusters.begin();
+ CaseClusterIt Last = Clusters.end() - 1;
+ auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
+ WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+
+ while (!WorkList.empty()) {
+ SwitchWorkListItem W = WorkList.back();
+ WorkList.pop_back();
+ unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
+
+ if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) {
+ // For optimized builds, lower large range as a balanced binary tree.
+ splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
+ continue;
+ }
+
+ lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 000000000000..b9888ae87639
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,1019 @@
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
+
+#include "StatepointLowering.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+class AddrSpaceCastInst;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class DbgValueInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class Function;
+class FunctionLoweringInfo;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class IndirectBrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineRegisterInfo;
+class MDNode;
+class MVT;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDDbgValue;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class DataLayout;
+class TargetLibraryInfo;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class VAArgInst;
+class ZExtInst;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+///
+class SelectionDAGBuilder {
+ /// CurInst - The current instruction being visited
+ const Instruction *CurInst;
+
+ DenseMap<const Value*, SDValue> NodeMap;
+
+ /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+ /// to preserve debug information for incoming arguments.
+ DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+
+ /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+ class DanglingDebugInfo {
+ const DbgValueInst* DI;
+ DebugLoc dl;
+ unsigned SDNodeOrder;
+ public:
+ DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO)
+ : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {}
+ const DbgValueInst* getDI() { return DI; }
+ DebugLoc getdl() { return dl; }
+ unsigned getSDNodeOrder() { return SDNodeOrder; }
+ };
+
+ /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
+ /// yet seen the referent. We defer handling these until we do see it.
+ DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+
+public:
+ /// PendingLoads - Loads are not emitted to the program immediately. We bunch
+ /// them up and then emit token factor nodes when possible. This allows us to
+ /// get simple disambiguation between loads without worrying about alias
+ /// analysis.
+ SmallVector<SDValue, 8> PendingLoads;
+
+ /// State used while lowering a statepoint sequence (gc_statepoint,
+ /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details.
+ StatepointLoweringState StatepointLowering;
+private:
+
+ /// PendingExports - CopyToReg nodes that copy values to virtual registers
+ /// for export to other blocks need to be emitted before any terminator
+ /// instruction, but they have no other ordering requirements. We bunch them
+ /// up and the emit a single tokenfactor for them just before terminator
+ /// instructions.
+ SmallVector<SDValue, 8> PendingExports;
+
+ /// SDNodeOrder - A unique monotonically increasing number used to order the
+ /// SDNodes we create.
+ unsigned SDNodeOrder;
+
+ enum CaseClusterKind {
+ /// A cluster of adjacent case labels with the same destination, or just one
+ /// case.
+ CC_Range,
+ /// A cluster of cases suitable for jump table lowering.
+ CC_JumpTable,
+ /// A cluster of cases suitable for bit test lowering.
+ CC_BitTests
+ };
+
+ /// A cluster of case labels.
+ struct CaseCluster {
+ CaseClusterKind Kind;
+ const ConstantInt *Low, *High;
+ union {
+ MachineBasicBlock *MBB;
+ unsigned JTCasesIndex;
+ unsigned BTCasesIndex;
+ };
+ BranchProbability Prob;
+
+ static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
+ MachineBasicBlock *MBB, BranchProbability Prob) {
+ CaseCluster C;
+ C.Kind = CC_Range;
+ C.Low = Low;
+ C.High = High;
+ C.MBB = MBB;
+ C.Prob = Prob;
+ return C;
+ }
+
+ static CaseCluster jumpTable(const ConstantInt *Low,
+ const ConstantInt *High, unsigned JTCasesIndex,
+ BranchProbability Prob) {
+ CaseCluster C;
+ C.Kind = CC_JumpTable;
+ C.Low = Low;
+ C.High = High;
+ C.JTCasesIndex = JTCasesIndex;
+ C.Prob = Prob;
+ return C;
+ }
+
+ static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
+ unsigned BTCasesIndex, BranchProbability Prob) {
+ CaseCluster C;
+ C.Kind = CC_BitTests;
+ C.Low = Low;
+ C.High = High;
+ C.BTCasesIndex = BTCasesIndex;
+ C.Prob = Prob;
+ return C;
+ }
+ };
+
+ typedef std::vector<CaseCluster> CaseClusterVector;
+ typedef CaseClusterVector::iterator CaseClusterIt;
+
+ struct CaseBits {
+ uint64_t Mask;
+ MachineBasicBlock* BB;
+ unsigned Bits;
+ BranchProbability ExtraProb;
+
+ CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
+ BranchProbability Prob):
+ Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { }
+
+ CaseBits() : Mask(0), BB(nullptr), Bits(0) {}
+ };
+
+ typedef std::vector<CaseBits> CaseBitsVector;
+
+ /// Sort Clusters and merge adjacent cases.
+ void sortAndRangeify(CaseClusterVector &Clusters);
+
+ /// CaseBlock - This structure is used to communicate between
+ /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+ /// blocks needed by multi-case switch statements.
+ struct CaseBlock {
+ CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+ const Value *cmpmiddle, MachineBasicBlock *truebb,
+ MachineBasicBlock *falsebb, MachineBasicBlock *me,
+ BranchProbability trueprob = BranchProbability::getUnknown(),
+ BranchProbability falseprob = BranchProbability::getUnknown())
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob),
+ FalseProb(falseprob) {}
+
+ // CC - the condition code to use for the case block's setcc node
+ ISD::CondCode CC;
+
+ // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+ // Emit by default LHS op RHS. MHS is used for range comparisons:
+ // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+ const Value *CmpLHS, *CmpMHS, *CmpRHS;
+
+ // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+ MachineBasicBlock *TrueBB, *FalseBB;
+
+ // ThisBB - the block into which to emit the code for the setcc and branches
+ MachineBasicBlock *ThisBB;
+
+ // TrueProb/FalseProb - branch weights.
+ BranchProbability TrueProb, FalseProb;
+ };
+
+ struct JumpTable {
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+
+ /// Reg - the virtual register containing the index of the jump table entry
+ //. to jump to.
+ unsigned Reg;
+ /// JTI - the JumpTableIndex for this jump table in the function.
+ unsigned JTI;
+ /// MBB - the MBB into which to emit the code for the indirect jump.
+ MachineBasicBlock *MBB;
+ /// Default - the MBB of the default bb, which is a successor of the range
+ /// check MBB. This is when updating PHI nodes in successors.
+ MachineBasicBlock *Default;
+ };
+ struct JumpTableHeader {
+ JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+ bool E = false)
+ : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
+ Emitted(E) {}
+ APInt First;
+ APInt Last;
+ const Value *SValue;
+ MachineBasicBlock *HeaderBB;
+ bool Emitted;
+ };
+ typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+ struct BitTestCase {
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
+ BranchProbability Prob):
+ Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { }
+ uint64_t Mask;
+ MachineBasicBlock *ThisBB;
+ MachineBasicBlock *TargetBB;
+ BranchProbability ExtraProb;
+ };
+
+ typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+ struct BitTestBlock {
+ BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
+ bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+ BitTestInfo C, BranchProbability Pr)
+ : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
+ RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
+ Cases(std::move(C)), Prob(Pr) {}
+ APInt First;
+ APInt Range;
+ const Value *SValue;
+ unsigned Reg;
+ MVT RegVT;
+ bool Emitted;
+ bool ContiguousRange;
+ MachineBasicBlock *Parent;
+ MachineBasicBlock *Default;
+ BitTestInfo Cases;
+ BranchProbability Prob;
+ BranchProbability DefaultProb;
+ };
+
+ /// Check whether a range of clusters is dense enough for a jump table.
+ bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
+ unsigned First, unsigned Last, unsigned MinDensity);
+
+ /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
+ /// decides it's not a good idea.
+ bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
+ unsigned Last, const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
+
+ /// Find clusters of cases suitable for jump table lowering.
+ void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB);
+
+ /// Check whether the range [Low,High] fits in a machine word.
+ bool rangeFitsInWord(const APInt &Low, const APInt &High);
+
+ /// Check whether these clusters are suitable for lowering with bit tests based
+ /// on the number of destinations, comparison metric, and range.
+ bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
+ const APInt &Low, const APInt &High);
+
+ /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
+ /// decides it's not a good idea.
+ bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
+ const SwitchInst *SI, CaseCluster &BTCluster);
+
+ /// Find clusters of cases suitable for bit test lowering.
+ void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
+
+ struct SwitchWorkListItem {
+ MachineBasicBlock *MBB;
+ CaseClusterIt FirstCluster;
+ CaseClusterIt LastCluster;
+ const ConstantInt *GE;
+ const ConstantInt *LT;
+ BranchProbability DefaultProb;
+ };
+ typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
+
+ /// Determine the rank by weight of CC in [First,Last]. If CC has more weight
+ /// than each cluster in the range, its rank is 0.
+ static unsigned caseClusterRank(const CaseCluster &CC, CaseClusterIt First,
+ CaseClusterIt Last);
+
+ /// Emit comparison and split W into two subtrees.
+ void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
+ Value *Cond, MachineBasicBlock *SwitchMBB);
+
+ /// Lower W.
+ void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB);
+
+
+ /// A class which encapsulates all of the information needed to generate a
+ /// stack protector check and signals to isel via its state being initialized
+ /// that a stack protector needs to be generated.
+ ///
+ /// *NOTE* The following is a high level documentation of SelectionDAG Stack
+ /// Protector Generation. The reason that it is placed here is for a lack of
+ /// other good places to stick it.
+ ///
+ /// High Level Overview of SelectionDAG Stack Protector Generation:
+ ///
+ /// Previously, generation of stack protectors was done exclusively in the
+ /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated
+ /// splitting basic blocks at the IR level to create the success/failure basic
+ /// blocks in the tail of the basic block in question. As a result of this,
+ /// calls that would have qualified for the sibling call optimization were no
+ /// longer eligible for optimization since said calls were no longer right in
+ /// the "tail position" (i.e. the immediate predecessor of a ReturnInst
+ /// instruction).
+ ///
+ /// Then it was noticed that since the sibling call optimization causes the
+ /// callee to reuse the caller's stack, if we could delay the generation of
+ /// the stack protector check until later in CodeGen after the sibling call
+ /// decision was made, we get both the tail call optimization and the stack
+ /// protector check!
+ ///
+ /// A few goals in solving this problem were:
+ ///
+ /// 1. Preserve the architecture independence of stack protector generation.
+ ///
+ /// 2. Preserve the normal IR level stack protector check for platforms like
+ /// OpenBSD for which we support platform-specific stack protector
+ /// generation.
+ ///
+ /// The main problem that guided the present solution is that one can not
+ /// solve this problem in an architecture independent manner at the IR level
+ /// only. This is because:
+ ///
+ /// 1. The decision on whether or not to perform a sibling call on certain
+ /// platforms (for instance i386) requires lower level information
+ /// related to available registers that can not be known at the IR level.
+ ///
+ /// 2. Even if the previous point were not true, the decision on whether to
+ /// perform a tail call is done in LowerCallTo in SelectionDAG which
+ /// occurs after the Stack Protector Pass. As a result, one would need to
+ /// put the relevant callinst into the stack protector check success
+ /// basic block (where the return inst is placed) and then move it back
+ /// later at SelectionDAG/MI time before the stack protector check if the
+ /// tail call optimization failed. The MI level option was nixed
+ /// immediately since it would require platform-specific pattern
+ /// matching. The SelectionDAG level option was nixed because
+ /// SelectionDAG only processes one IR level basic block at a time
+ /// implying one could not create a DAG Combine to move the callinst.
+ ///
+ /// To get around this problem a few things were realized:
+ ///
+ /// 1. While one can not handle multiple IR level basic blocks at the
+ /// SelectionDAG Level, one can generate multiple machine basic blocks
+ /// for one IR level basic block. This is how we handle bit tests and
+ /// switches.
+ ///
+ /// 2. At the MI level, tail calls are represented via a special return
+ /// MIInst called "tcreturn". Thus if we know the basic block in which we
+ /// wish to insert the stack protector check, we get the correct behavior
+ /// by always inserting the stack protector check right before the return
+ /// statement. This is a "magical transformation" since no matter where
+ /// the stack protector check intrinsic is, we always insert the stack
+ /// protector check code at the end of the BB.
+ ///
+ /// Given the aforementioned constraints, the following solution was devised:
+ ///
+ /// 1. On platforms that do not support SelectionDAG stack protector check
+ /// generation, allow for the normal IR level stack protector check
+ /// generation to continue.
+ ///
+ /// 2. On platforms that do support SelectionDAG stack protector check
+ /// generation:
+ ///
+ /// a. Use the IR level stack protector pass to decide if a stack
+ /// protector is required/which BB we insert the stack protector check
+ /// in by reusing the logic already therein. If we wish to generate a
+ /// stack protector check in a basic block, we place a special IR
+ /// intrinsic called llvm.stackprotectorcheck right before the BB's
+ /// returninst or if there is a callinst that could potentially be
+ /// sibling call optimized, before the call inst.
+ ///
+ /// b. Then when a BB with said intrinsic is processed, we codegen the BB
+ /// normally via SelectBasicBlock. In said process, when we visit the
+ /// stack protector check, we do not actually emit anything into the
+ /// BB. Instead, we just initialize the stack protector descriptor
+ /// class (which involves stashing information/creating the success
+ /// mbbb and the failure mbb if we have not created one for this
+ /// function yet) and export the guard variable that we are going to
+ /// compare.
+ ///
+ /// c. After we finish selecting the basic block, in FinishBasicBlock if
+ /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is
+ /// initialized, we produce the validation code with one of these
+ /// techniques:
+ /// 1) with a call to a guard check function
+ /// 2) with inlined instrumentation
+ ///
+ /// 1) We insert a call to the check function before the terminator.
+ ///
+ /// 2) We first find a splice point in the parent basic block
+ /// before the terminator and then splice the terminator of said basic
+ /// block into the success basic block. Then we code-gen a new tail for
+ /// the parent basic block consisting of the two loads, the comparison,
+ /// and finally two branches to the success/failure basic blocks. We
+ /// conclude by code-gening the failure basic block if we have not
+ /// code-gened it already (all stack protector checks we generate in
+ /// the same function, use the same failure basic block).
+ class StackProtectorDescriptor {
+ public:
+ StackProtectorDescriptor()
+ : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr) {}
+
+ /// Returns true if all fields of the stack protector descriptor are
+ /// initialized implying that we should/are ready to emit a stack protector.
+ bool shouldEmitStackProtector() const {
+ return ParentMBB && SuccessMBB && FailureMBB;
+ }
+
+ bool shouldEmitFunctionBasedCheckStackProtector() const {
+ return ParentMBB && !SuccessMBB && !FailureMBB;
+ }
+
+ /// Initialize the stack protector descriptor structure for a new basic
+ /// block.
+ void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
+ bool FunctionBasedInstrumentation) {
+ // Make sure we are not initialized yet.
+ assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
+ "already initialized!");
+ ParentMBB = MBB;
+ if (!FunctionBasedInstrumentation) {
+ SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
+ FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
+ }
+ }
+
+ /// Reset state that changes when we handle different basic blocks.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. The specific basic block we are generating a
+ /// stack protector for (ParentMBB).
+ ///
+ /// 2. The successor machine basic block that will contain the tail of
+ /// parent mbb after we create the stack protector check (SuccessMBB). This
+ /// BB is visited only on stack protector check success.
+ void resetPerBBState() {
+ ParentMBB = nullptr;
+ SuccessMBB = nullptr;
+ }
+
+ /// Reset state that only changes when we switch functions.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. FailureMBB since we reuse the failure code path for all stack
+ /// protector checks created in an individual function.
+ ///
+ /// 2.The guard variable since the guard variable we are checking against is
+ /// always the same.
+ void resetPerFunctionState() {
+ FailureMBB = nullptr;
+ }
+
+ MachineBasicBlock *getParentMBB() { return ParentMBB; }
+ MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
+ MachineBasicBlock *getFailureMBB() { return FailureMBB; }
+
+ private:
+ /// The basic block for which we are generating the stack protector.
+ ///
+ /// As a result of stack protector generation, we will splice the
+ /// terminators of this basic block into the successor mbb SuccessMBB and
+ /// replace it with a compare/branch to the successor mbbs
+ /// SuccessMBB/FailureMBB depending on whether or not the stack protector
+ /// was violated.
+ MachineBasicBlock *ParentMBB;
+
+ /// A basic block visited on stack protector check success that contains the
+ /// terminators of ParentMBB.
+ MachineBasicBlock *SuccessMBB;
+
+ /// This basic block visited on stack protector check failure that will
+ /// contain a call to __stack_chk_fail().
+ MachineBasicBlock *FailureMBB;
+
+ /// Add a successor machine basic block to ParentMBB. If the successor mbb
+ /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
+ /// block will be created. Assign a large weight if IsLikely is true.
+ MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ bool IsLikely,
+ MachineBasicBlock *SuccMBB = nullptr);
+ };
+
+private:
+ const TargetMachine &TM;
+public:
+ /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling
+ /// nodes without a corresponding SDNode.
+ static const unsigned LowestSDNodeOrder = 1;
+
+ SelectionDAG &DAG;
+ const DataLayout *DL;
+ AliasAnalysis *AA;
+ const TargetLibraryInfo *LibInfo;
+
+ /// SwitchCases - Vector of CaseBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<CaseBlock> SwitchCases;
+ /// JTCases - Vector of JumpTable structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<JumpTableBlock> JTCases;
+ /// BitTestCases - Vector of BitTestBlock structures used to communicate
+ /// SwitchInst code generation information.
+ std::vector<BitTestBlock> BitTestCases;
+ /// A StackProtectorDescriptor structure used to communicate stack protector
+ /// information in between SelectBasicBlock and FinishBasicBlock.
+ StackProtectorDescriptor SPDescriptor;
+
+ // Emit PHI-node-operand constants only once even if used by multiple
+ // PHI nodes.
+ DenseMap<const Constant *, unsigned> ConstantsOut;
+
+ /// FuncInfo - Information about the function as a whole.
+ ///
+ FunctionLoweringInfo &FuncInfo;
+
+ /// GFI - Garbage collection metadata for the function.
+ GCFunctionInfo *GFI;
+
+ /// LPadToCallSiteMap - Map a landing pad to the call site indexes.
+ DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
+
+ /// HasTailCall - This is set to true if a call in the current
+ /// block has been translated as a tail call. In this case,
+ /// no subsequent DAG nodes should be created.
+ ///
+ bool HasTailCall;
+
+ LLVMContext *Context;
+
+ SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
+ CodeGenOpt::Level ol)
+ : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
+ DAG(dag), FuncInfo(funcinfo),
+ HasTailCall(false) {
+ }
+
+ void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ const TargetLibraryInfo *li);
+
+ /// clear - Clear out the current SelectionDAG and the associated
+ /// state and prepare this SelectionDAGBuilder object to be used
+ /// for a new block. This doesn't clear out information about
+ /// additional blocks that are needed to complete switch lowering
+ /// or PHI node updating; that information is cleared out as it is
+ /// consumed.
+ void clear();
+
+ /// clearDanglingDebugInfo - Clear the dangling debug information
+ /// map. This function is separated from the clear so that debug
+ /// information that is dangling in a basic block can be properly
+ /// resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached
+ /// to PHI nodes.
+ void clearDanglingDebugInfo();
+
+ /// getRoot - Return the current virtual root of the Selection DAG,
+ /// flushing any PendingLoad items. This must be done before emitting
+ /// a store or any other node that may need to be ordered after any
+ /// prior load instructions.
+ ///
+ SDValue getRoot();
+
+ /// getControlRoot - Similar to getRoot, but instead of flushing all the
+ /// PendingLoad items, flush all the PendingExports items. It is necessary
+ /// to do this before emitting a terminator instruction.
+ ///
+ SDValue getControlRoot();
+
+ SDLoc getCurSDLoc() const {
+ return SDLoc(CurInst, SDNodeOrder);
+ }
+
+ DebugLoc getCurDebugLoc() const {
+ return CurInst ? CurInst->getDebugLoc() : DebugLoc();
+ }
+
+ unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+ void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
+
+ void visit(const Instruction &I);
+
+ void visit(unsigned Opcode, const User &I);
+
+ /// getCopyFromRegs - If there was virtual register allocated for the value V
+ /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+ SDValue getCopyFromRegs(const Value *V, Type *Ty);
+
+ // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+ // generate the debug data structures now that we've seen its definition.
+ void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+ SDValue getValue(const Value *V);
+ bool findValue(const Value *V) const;
+
+ SDValue getNonRegisterValue(const Value *V);
+ SDValue getValueImpl(const Value *V);
+
+ void setValue(const Value *V, SDValue NewN) {
+ SDValue &N = NodeMap[V];
+ assert(!N.getNode() && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void setUnusedArgValue(const Value *V, SDValue NewN) {
+ SDValue &N = UnusedArgNodeMap[V];
+ assert(!N.getNode() && "Already set a value for this node!");
+ N = NewN;
+ }
+
+ void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TW,
+ BranchProbability FW);
+ void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB,
+ MachineBasicBlock *SwitchBB,
+ BranchProbability TW, BranchProbability FW);
+ bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+ bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+ void CopyToExportRegsIfNeeded(const Value *V);
+ void ExportFromCurrentBlock(const Value *V);
+ void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+ const BasicBlock *EHPadBB = nullptr);
+
+ // Lower range metadata from 0 to N to assert zext to an integer of nearest
+ // floor power of two.
+ SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
+ SDValue Op);
+
+ void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
+ ImmutableCallSite CS, unsigned ArgIdx,
+ unsigned NumArgs, SDValue Callee,
+ Type *ReturnTy, bool IsPatchPoint);
+
+ std::pair<SDValue, SDValue>
+ lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+ const BasicBlock *EHPadBB = nullptr);
+
+ /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// references that need to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
+ /// Describes a gc.statepoint or a gc.statepoint like thing for the purposes
+ /// of lowering into a STATEPOINT node.
+ struct StatepointLoweringInfo {
+ /// Bases[i] is the base pointer for Ptrs[i]. Together they denote the set
+ /// of gc pointers this STATEPOINT has to relocate.
+ SmallVector<const Value *, 16> Bases;
+ SmallVector<const Value *, 16> Ptrs;
+
+ /// The set of gc.relocate calls associated with this gc.statepoint.
+ SmallVector<const GCRelocateInst *, 16> GCRelocates;
+
+ /// The full list of gc arguments to the gc.statepoint being lowered.
+ ArrayRef<const Use> GCArgs;
+
+ /// The gc.statepoint instruction.
+ const Instruction *StatepointInstr = nullptr;
+
+ /// The list of gc transition arguments present in the gc.statepoint being
+ /// lowered.
+ ArrayRef<const Use> GCTransitionArgs;
+
+ /// The ID that the resulting STATEPOINT instruction has to report.
+ unsigned ID = -1;
+
+ /// Information regarding the underlying call instruction.
+ TargetLowering::CallLoweringInfo CLI;
+
+ /// The deoptimization state associated with this gc.statepoint call, if
+ /// any.
+ ArrayRef<const Use> DeoptState;
+
+ /// Flags associated with the meta arguments being lowered.
+ uint64_t StatepointFlags = -1;
+
+ /// The number of patchable bytes the call needs to get lowered into.
+ unsigned NumPatchBytes = -1;
+
+ /// The exception handling unwind destination, in case this represents an
+ /// invoke of gc.statepoint.
+ const BasicBlock *EHPadBB = nullptr;
+
+ explicit StatepointLoweringInfo(SelectionDAG &DAG) : CLI(DAG) {}
+ };
+
+ /// Lower \p SLI into a STATEPOINT instruction.
+ SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SLI);
+
+ // This function is responsible for the whole statepoint lowering process.
+ // It uniformly handles invoke and call statepoints.
+ void LowerStatepoint(ImmutableStatepoint Statepoint,
+ const BasicBlock *EHPadBB = nullptr);
+
+ void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee,
+ const BasicBlock *EHPadBB);
+
+ void LowerDeoptimizeCall(const CallInst *CI);
+ void LowerDeoptimizingReturn();
+
+ void LowerCallSiteWithDeoptBundleImpl(ImmutableCallSite CS, SDValue Callee,
+ const BasicBlock *EHPadBB,
+ bool VarArgDisallowed,
+ bool ForceVoidReturnTy);
+
+private:
+ // Terminator instructions.
+ void visitRet(const ReturnInst &I);
+ void visitBr(const BranchInst &I);
+ void visitSwitch(const SwitchInst &I);
+ void visitIndirectBr(const IndirectBrInst &I);
+ void visitUnreachable(const UnreachableInst &I);
+ void visitCleanupRet(const CleanupReturnInst &I);
+ void visitCatchSwitch(const CatchSwitchInst &I);
+ void visitCatchRet(const CatchReturnInst &I);
+ void visitCatchPad(const CatchPadInst &I);
+ void visitCleanupPad(const CleanupPadInst &CPI);
+
+ BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const;
+ void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown());
+
+public:
+ void visitSwitchCase(CaseBlock &CB,
+ MachineBasicBlock *SwitchBB);
+ void visitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB);
+ void visitSPDescriptorFailure(StackProtectorDescriptor &SPD);
+ void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
+ void visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
+ BranchProbability BranchProbToNext,
+ unsigned Reg,
+ BitTestCase &B,
+ MachineBasicBlock *SwitchBB);
+ void visitJumpTable(JumpTable &JT);
+ void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+ MachineBasicBlock *SwitchBB);
+
+private:
+ // These all get lowered before this pass.
+ void visitInvoke(const InvokeInst &I);
+ void visitResume(const ResumeInst &I);
+
+ void visitBinary(const User &I, unsigned OpCode);
+ void visitShift(const User &I, unsigned Opcode);
+ void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
+ void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+ void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
+ void visitFSub(const User &I);
+ void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
+ void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+ void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+ void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+ void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+ void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+ void visitSDiv(const User &I);
+ void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+ void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+ void visitOr (const User &I) { visitBinary(I, ISD::OR); }
+ void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+ void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+ void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+ void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+ void visitICmp(const User &I);
+ void visitFCmp(const User &I);
+ // Visit the conversion instructions
+ void visitTrunc(const User &I);
+ void visitZExt(const User &I);
+ void visitSExt(const User &I);
+ void visitFPTrunc(const User &I);
+ void visitFPExt(const User &I);
+ void visitFPToUI(const User &I);
+ void visitFPToSI(const User &I);
+ void visitUIToFP(const User &I);
+ void visitSIToFP(const User &I);
+ void visitPtrToInt(const User &I);
+ void visitIntToPtr(const User &I);
+ void visitBitCast(const User &I);
+ void visitAddrSpaceCast(const User &I);
+
+ void visitExtractElement(const User &I);
+ void visitInsertElement(const User &I);
+ void visitShuffleVector(const User &I);
+
+ void visitExtractValue(const ExtractValueInst &I);
+ void visitInsertValue(const InsertValueInst &I);
+ void visitLandingPad(const LandingPadInst &I);
+
+ void visitGetElementPtr(const User &I);
+ void visitSelect(const User &I);
+
+ void visitAlloca(const AllocaInst &I);
+ void visitLoad(const LoadInst &I);
+ void visitStore(const StoreInst &I);
+ void visitMaskedLoad(const CallInst &I);
+ void visitMaskedStore(const CallInst &I);
+ void visitMaskedGather(const CallInst &I);
+ void visitMaskedScatter(const CallInst &I);
+ void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
+ void visitAtomicRMW(const AtomicRMWInst &I);
+ void visitFence(const FenceInst &I);
+ void visitPHI(const PHINode &I);
+ void visitCall(const CallInst &I);
+ bool visitMemCmpCall(const CallInst &I);
+ bool visitMemChrCall(const CallInst &I);
+ bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
+ bool visitStrCmpCall(const CallInst &I);
+ bool visitStrLenCall(const CallInst &I);
+ bool visitStrNLenCall(const CallInst &I);
+ bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
+ bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode);
+ void visitAtomicLoad(const LoadInst &I);
+ void visitAtomicStore(const StoreInst &I);
+ void visitLoadFromSwiftError(const LoadInst &I);
+ void visitStoreToSwiftError(const StoreInst &I);
+
+ void visitInlineAsm(ImmutableCallSite CS);
+ const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+ void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+
+ void visitVAStart(const CallInst &I);
+ void visitVAArg(const VAArgInst &I);
+ void visitVAEnd(const CallInst &I);
+ void visitVACopy(const CallInst &I);
+ void visitStackmap(const CallInst &I);
+ void visitPatchpoint(ImmutableCallSite CS,
+ const BasicBlock *EHPadBB = nullptr);
+
+ // These two are implemented in StatepointLowering.cpp
+ void visitGCRelocate(const GCRelocateInst &I);
+ void visitGCResult(const GCResultInst &I);
+
+ void visitUserOp1(const Instruction &I) {
+ llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+ }
+ void visitUserOp2(const Instruction &I) {
+ llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+ }
+
+ void processIntegerCallValue(const Instruction &I,
+ SDValue Value, bool IsSigned);
+
+ void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+ void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message);
+
+ /// EmitFuncArgumentDbgValue - If V is an function argument then create
+ /// corresponding DBG_VALUE machine instruction for it now. At the end of
+ /// instruction selection, they will be inserted to the entry BB.
+ bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
+ DIExpression *Expr, DILocation *DL,
+ int64_t Offset, bool IsIndirect,
+ const SDValue &N);
+
+ /// Return the next block after MBB, or nullptr if there is none.
+ MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
+
+ /// Update the DAG and DAG builder with the relevant information after
+ /// a new root node has been created which could be a tail call.
+ void updateDAGForMaybeTailCall(SDValue MaybeTC);
+};
+
+/// RegsForValue - This struct represents the registers (physical or virtual)
+/// that a particular set of values is assigned, and the type information about
+/// the value. The most common situation is to represent one value at a time,
+/// but struct or array values are handled element-wise as multiple values. The
+/// splitting of aggregates is performed recursively, so that we never have
+/// aggregate-typed registers. The values at this point do not necessarily have
+/// legal types, so each value may require one or more registers of some legal
+/// type.
+///
+struct RegsForValue {
+ /// ValueVTs - The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ ///
+ SmallVector<EVT, 4> ValueVTs;
+
+ /// RegVTs - The value types of the registers. This is the same size as
+ /// ValueVTs and it records, for each value, what the type of the assigned
+ /// register or registers are. (Individual values are never synthesized
+ /// from more than one type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ ///
+ SmallVector<MVT, 4> RegVTs;
+
+ /// Regs - This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ ///
+ SmallVector<unsigned, 4> Regs;
+
+ RegsForValue();
+
+ RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
+
+ RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+ const DataLayout &DL, unsigned Reg, Type *Ty);
+
+ /// append - Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ }
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVTs value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+ const SDLoc &dl, SDValue &Chain, SDValue *Flag,
+ const Value *V = nullptr) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified
+ /// value into the registers specified by this object. This uses Chain/Flag
+ /// as the input and updates them for the output Chain/Flag. If the Flag
+ /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used
+ /// in printing better diagnostic messages on error.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl,
+ SDValue &Chain, SDValue *Flag, const Value *V = nullptr,
+ ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker, matching input operand index
+ /// (if applicable), and includes the number of values added into it.
+ void AddInlineAsmOperands(unsigned Kind, bool HasMatching,
+ unsigned MatchingIdx, const SDLoc &dl,
+ SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 000000000000..401da059dedc
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,719 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/Printable.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+static cl::opt<bool>
+VerboseDAGDumping("dag-dump-verbose", cl::Hidden,
+ cl::desc("Display more information when dumping selection "
+ "DAG nodes."));
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+ switch (getOpcode()) {
+ default:
+ if (getOpcode() < ISD::BUILTIN_OP_END)
+ return "<<Unknown DAG Node>>";
+ if (isMachineOpcode()) {
+ if (G)
+ if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo())
+ if (getMachineOpcode() < TII->getNumOpcodes())
+ return TII->getName(getMachineOpcode());
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+ }
+ if (G) {
+ const TargetLowering &TLI = G->getTargetLoweringInfo();
+ const char *Name = TLI.getTargetNodeName(getOpcode());
+ if (Name) return Name;
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+ }
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+ case ISD::DELETED_NODE: return "<<Deleted Node!>>";
+#endif
+ case ISD::PREFETCH: return "Prefetch";
+ case ISD::ATOMIC_FENCE: return "AtomicFence";
+ case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess";
+ case ISD::ATOMIC_SWAP: return "AtomicSwap";
+ case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
+ case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
+ case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd";
+ case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr";
+ case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor";
+ case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand";
+ case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin";
+ case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
+ case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
+ case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD: return "AtomicLoad";
+ case ISD::ATOMIC_STORE: return "AtomicStore";
+ case ISD::PCMARKER: return "PCMarker";
+ case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::SRCVALUE: return "SrcValue";
+ case ISD::MDNODE_SDNODE: return "MDNode";
+ case ISD::EntryToken: return "EntryToken";
+ case ISD::TokenFactor: return "TokenFactor";
+ case ISD::AssertSext: return "AssertSext";
+ case ISD::AssertZext: return "AssertZext";
+
+ case ISD::BasicBlock: return "BasicBlock";
+ case ISD::VALUETYPE: return "ValueType";
+ case ISD::Register: return "Register";
+ case ISD::RegisterMask: return "RegisterMask";
+ case ISD::Constant:
+ if (cast<ConstantSDNode>(this)->isOpaque())
+ return "OpaqueConstant";
+ return "Constant";
+ case ISD::ConstantFP: return "ConstantFP";
+ case ISD::GlobalAddress: return "GlobalAddress";
+ case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+ case ISD::FrameIndex: return "FrameIndex";
+ case ISD::JumpTable: return "JumpTable";
+ case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+ case ISD::RETURNADDR: return "RETURNADDR";
+ case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER";
+ case ISD::READ_REGISTER: return "READ_REGISTER";
+ case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
+ case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+ case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH";
+ case ISD::ConstantPool: return "ConstantPool";
+ case ISD::TargetIndex: return "TargetIndex";
+ case ISD::ExternalSymbol: return "ExternalSymbol";
+ case ISD::BlockAddress: return "BlockAddress";
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return Intrinsic::getName((Intrinsic::ID)IID);
+ else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ return TII->getName(IID);
+ llvm_unreachable("Invalid intrinsic ID");
+ }
+
+ case ISD::BUILD_VECTOR: return "BUILD_VECTOR";
+ case ISD::TargetConstant:
+ if (cast<ConstantSDNode>(this)->isOpaque())
+ return "OpaqueTargetConstant";
+ return "TargetConstant";
+ case ISD::TargetConstantFP: return "TargetConstantFP";
+ case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+ case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+ case ISD::TargetFrameIndex: return "TargetFrameIndex";
+ case ISD::TargetJumpTable: return "TargetJumpTable";
+ case ISD::TargetConstantPool: return "TargetConstantPool";
+ case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+ case ISD::MCSymbol: return "MCSymbol";
+ case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+ case ISD::CopyToReg: return "CopyToReg";
+ case ISD::CopyFromReg: return "CopyFromReg";
+ case ISD::UNDEF: return "undef";
+ case ISD::MERGE_VALUES: return "merge_values";
+ case ISD::INLINEASM: return "inlineasm";
+ case ISD::EH_LABEL: return "eh_label";
+ case ISD::HANDLENODE: return "handlenode";
+
+ // Unary operators
+ case ISD::FABS: return "fabs";
+ case ISD::FMINNUM: return "fminnum";
+ case ISD::FMAXNUM: return "fmaxnum";
+ case ISD::FMINNAN: return "fminnan";
+ case ISD::FMAXNAN: return "fmaxnan";
+ case ISD::FNEG: return "fneg";
+ case ISD::FSQRT: return "fsqrt";
+ case ISD::FSIN: return "fsin";
+ case ISD::FCOS: return "fcos";
+ case ISD::FSINCOS: return "fsincos";
+ case ISD::FTRUNC: return "ftrunc";
+ case ISD::FFLOOR: return "ffloor";
+ case ISD::FCEIL: return "fceil";
+ case ISD::FRINT: return "frint";
+ case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FROUND: return "fround";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
+
+ // Binary operators
+ case ISD::ADD: return "add";
+ case ISD::SUB: return "sub";
+ case ISD::MUL: return "mul";
+ case ISD::MULHU: return "mulhu";
+ case ISD::MULHS: return "mulhs";
+ case ISD::SDIV: return "sdiv";
+ case ISD::UDIV: return "udiv";
+ case ISD::SREM: return "srem";
+ case ISD::UREM: return "urem";
+ case ISD::SMUL_LOHI: return "smul_lohi";
+ case ISD::UMUL_LOHI: return "umul_lohi";
+ case ISD::SDIVREM: return "sdivrem";
+ case ISD::UDIVREM: return "udivrem";
+ case ISD::AND: return "and";
+ case ISD::OR: return "or";
+ case ISD::XOR: return "xor";
+ case ISD::SHL: return "shl";
+ case ISD::SRA: return "sra";
+ case ISD::SRL: return "srl";
+ case ISD::ROTL: return "rotl";
+ case ISD::ROTR: return "rotr";
+ case ISD::FADD: return "fadd";
+ case ISD::FSUB: return "fsub";
+ case ISD::FMUL: return "fmul";
+ case ISD::FDIV: return "fdiv";
+ case ISD::FMA: return "fma";
+ case ISD::FMAD: return "fmad";
+ case ISD::FREM: return "frem";
+ case ISD::FCOPYSIGN: return "fcopysign";
+ case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FCANONICALIZE: return "fcanonicalize";
+ case ISD::FPOW: return "fpow";
+ case ISD::SMIN: return "smin";
+ case ISD::SMAX: return "smax";
+ case ISD::UMIN: return "umin";
+ case ISD::UMAX: return "umax";
+
+ case ISD::FPOWI: return "fpowi";
+ case ISD::SETCC: return "setcc";
+ case ISD::SETCCE: return "setcce";
+ case ISD::SELECT: return "select";
+ case ISD::VSELECT: return "vselect";
+ case ISD::SELECT_CC: return "select_cc";
+ case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
+ case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
+ case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
+ case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
+ case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::CARRY_FALSE: return "carry_false";
+ case ISD::ADDC: return "addc";
+ case ISD::ADDE: return "adde";
+ case ISD::SADDO: return "saddo";
+ case ISD::UADDO: return "uaddo";
+ case ISD::SSUBO: return "ssubo";
+ case ISD::USUBO: return "usubo";
+ case ISD::SMULO: return "smulo";
+ case ISD::UMULO: return "umulo";
+ case ISD::SUBC: return "subc";
+ case ISD::SUBE: return "sube";
+ case ISD::SHL_PARTS: return "shl_parts";
+ case ISD::SRA_PARTS: return "sra_parts";
+ case ISD::SRL_PARTS: return "srl_parts";
+
+ // Conversion operators.
+ case ISD::SIGN_EXTEND: return "sign_extend";
+ case ISD::ZERO_EXTEND: return "zero_extend";
+ case ISD::ANY_EXTEND: return "any_extend";
+ case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg";
+ case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg";
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
+ case ISD::TRUNCATE: return "truncate";
+ case ISD::FP_ROUND: return "fp_round";
+ case ISD::FLT_ROUNDS_: return "flt_rounds";
+ case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+ case ISD::FP_EXTEND: return "fp_extend";
+
+ case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::BITCAST: return "bitcast";
+ case ISD::ADDRSPACECAST: return "addrspacecast";
+ case ISD::FP16_TO_FP: return "fp16_to_fp";
+ case ISD::FP_TO_FP16: return "fp_to_fp16";
+
+ case ISD::CONVERT_RNDSAT: {
+ switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+ default: llvm_unreachable("Unknown cvt code!");
+ case ISD::CVT_FF: return "cvt_ff";
+ case ISD::CVT_FS: return "cvt_fs";
+ case ISD::CVT_FU: return "cvt_fu";
+ case ISD::CVT_SF: return "cvt_sf";
+ case ISD::CVT_UF: return "cvt_uf";
+ case ISD::CVT_SS: return "cvt_ss";
+ case ISD::CVT_SU: return "cvt_su";
+ case ISD::CVT_US: return "cvt_us";
+ case ISD::CVT_UU: return "cvt_uu";
+ }
+ }
+
+ // Control flow instructions
+ case ISD::BR: return "br";
+ case ISD::BRIND: return "brind";
+ case ISD::BR_JT: return "br_jt";
+ case ISD::BRCOND: return "brcond";
+ case ISD::BR_CC: return "br_cc";
+ case ISD::CALLSEQ_START: return "callseq_start";
+ case ISD::CALLSEQ_END: return "callseq_end";
+
+ // EH instructions
+ case ISD::CATCHRET: return "catchret";
+ case ISD::CLEANUPRET: return "cleanupret";
+
+ // Other operators
+ case ISD::LOAD: return "load";
+ case ISD::STORE: return "store";
+ case ISD::MLOAD: return "masked_load";
+ case ISD::MSTORE: return "masked_store";
+ case ISD::MGATHER: return "masked_gather";
+ case ISD::MSCATTER: return "masked_scatter";
+ case ISD::VAARG: return "vaarg";
+ case ISD::VACOPY: return "vacopy";
+ case ISD::VAEND: return "vaend";
+ case ISD::VASTART: return "vastart";
+ case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+ case ISD::EXTRACT_ELEMENT: return "extract_element";
+ case ISD::BUILD_PAIR: return "build_pair";
+ case ISD::STACKSAVE: return "stacksave";
+ case ISD::STACKRESTORE: return "stackrestore";
+ case ISD::TRAP: return "trap";
+ case ISD::DEBUGTRAP: return "debugtrap";
+ case ISD::LIFETIME_START: return "lifetime.start";
+ case ISD::LIFETIME_END: return "lifetime.end";
+ case ISD::GC_TRANSITION_START: return "gc_transition.start";
+ case ISD::GC_TRANSITION_END: return "gc_transition.end";
+ case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
+
+ // Bit manipulation
+ case ISD::BITREVERSE: return "bitreverse";
+ case ISD::BSWAP: return "bswap";
+ case ISD::CTPOP: return "ctpop";
+ case ISD::CTTZ: return "cttz";
+ case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+ case ISD::CTLZ: return "ctlz";
+ case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+
+ // Trampolines
+ case ISD::INIT_TRAMPOLINE: return "init_trampoline";
+ case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
+
+ case ISD::CONDCODE:
+ switch (cast<CondCodeSDNode>(this)->get()) {
+ default: llvm_unreachable("Unknown setcc condition!");
+ case ISD::SETOEQ: return "setoeq";
+ case ISD::SETOGT: return "setogt";
+ case ISD::SETOGE: return "setoge";
+ case ISD::SETOLT: return "setolt";
+ case ISD::SETOLE: return "setole";
+ case ISD::SETONE: return "setone";
+
+ case ISD::SETO: return "seto";
+ case ISD::SETUO: return "setuo";
+ case ISD::SETUEQ: return "setueq";
+ case ISD::SETUGT: return "setugt";
+ case ISD::SETUGE: return "setuge";
+ case ISD::SETULT: return "setult";
+ case ISD::SETULE: return "setule";
+ case ISD::SETUNE: return "setune";
+
+ case ISD::SETEQ: return "seteq";
+ case ISD::SETGT: return "setgt";
+ case ISD::SETGE: return "setge";
+ case ISD::SETLT: return "setlt";
+ case ISD::SETLE: return "setle";
+ case ISD::SETNE: return "setne";
+
+ case ISD::SETTRUE: return "settrue";
+ case ISD::SETTRUE2: return "settrue2";
+ case ISD::SETFALSE: return "setfalse";
+ case ISD::SETFALSE2: return "setfalse2";
+ }
+ }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+ switch (AM) {
+ default: return "";
+ case ISD::PRE_INC: return "<pre-inc>";
+ case ISD::PRE_DEC: return "<pre-dec>";
+ case ISD::POST_INC: return "<post-inc>";
+ case ISD::POST_DEC: return "<post-dec>";
+ }
+}
+
+static Printable PrintNodeId(const SDNode &Node) {
+ return Printable([&Node](raw_ostream &OS) {
+#ifndef NDEBUG
+ OS << 't' << Node.PersistentId;
+#else
+ OS << (const void*)&Node;
+#endif
+ });
+}
+
+LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
+void SDNode::dump(const SelectionDAG *G) const {
+ print(dbgs(), G);
+ dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+ for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+ if (i) OS << ",";
+ if (getValueType(i) == MVT::Other)
+ OS << "ch";
+ else
+ OS << getValueType(i).getEVTString();
+ }
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+ if (!MN->memoperands_empty()) {
+ OS << "<";
+ OS << "Mem:";
+ for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+ e = MN->memoperands_end(); i != e; ++i) {
+ OS << **i;
+ if (std::next(i) != e)
+ OS << " ";
+ }
+ OS << ">";
+ }
+ } else if (const ShuffleVectorSDNode *SVN =
+ dyn_cast<ShuffleVectorSDNode>(this)) {
+ OS << "<";
+ for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (i) OS << ",";
+ if (Idx < 0)
+ OS << "u";
+ else
+ OS << Idx;
+ }
+ OS << ">";
+ } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+ OS << '<' << CSDN->getAPIntValue() << '>';
+ } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+ if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+ OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+ else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+ OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+ else {
+ OS << "<APFloat(";
+ CSDN->getValueAPF().bitcastToAPInt().dump();
+ OS << ")>";
+ }
+ } else if (const GlobalAddressSDNode *GADN =
+ dyn_cast<GlobalAddressSDNode>(this)) {
+ int64_t offset = GADN->getOffset();
+ OS << '<';
+ GADN->getGlobal()->printAsOperand(OS);
+ OS << '>';
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = GADN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+ OS << "<" << FIDN->getIndex() << ">";
+ } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+ OS << "<" << JTDN->getIndex() << ">";
+ if (unsigned int TF = JTDN->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+ int offset = CP->getOffset();
+ if (CP->isMachineConstantPoolEntry())
+ OS << "<" << *CP->getMachineCPVal() << ">";
+ else
+ OS << "<" << *CP->getConstVal() << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = CP->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+ OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+ if (unsigned TF = TI->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+ OS << "<";
+ const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+ if (LBB)
+ OS << LBB->getName() << " ";
+ OS << (const void*)BBDN->getBasicBlock() << ">";
+ } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+ OS << ' ' << PrintReg(R->getReg(),
+ G ? G->getSubtarget().getRegisterInfo() : nullptr);
+ } else if (const ExternalSymbolSDNode *ES =
+ dyn_cast<ExternalSymbolSDNode>(this)) {
+ OS << "'" << ES->getSymbol() << "'";
+ if (unsigned int TF = ES->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+ if (M->getValue())
+ OS << "<" << M->getValue() << ">";
+ else
+ OS << "<null>";
+ } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+ if (MD->getMD())
+ OS << "<" << MD->getMD() << ">";
+ else
+ OS << "<null>";
+ } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+ OS << ":" << N->getVT().getEVTString();
+ }
+ else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+ OS << "<" << *LD->getMemOperand();
+
+ bool doExt = true;
+ switch (LD->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << LD->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(LD->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+ OS << "<" << *ST->getMemOperand();
+
+ if (ST->isTruncatingStore())
+ OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+ const char *AM = getIndexedModeName(ST->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
+ OS << ">";
+ } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ OS << "<" << *M->getMemOperand() << ">";
+ } else if (const BlockAddressSDNode *BA =
+ dyn_cast<BlockAddressSDNode>(this)) {
+ int64_t offset = BA->getOffset();
+ OS << "<";
+ BA->getBlockAddress()->getFunction()->printAsOperand(OS, false);
+ OS << ", ";
+ BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false);
+ OS << ">";
+ if (offset > 0)
+ OS << " + " << offset;
+ else
+ OS << " " << offset;
+ if (unsigned int TF = BA->getTargetFlags())
+ OS << " [TF=" << TF << ']';
+ } else if (const AddrSpaceCastSDNode *ASC =
+ dyn_cast<AddrSpaceCastSDNode>(this)) {
+ OS << '['
+ << ASC->getSrcAddressSpace()
+ << " -> "
+ << ASC->getDestAddressSpace()
+ << ']';
+ }
+
+ if (VerboseDAGDumping) {
+ if (unsigned Order = getIROrder())
+ OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
+
+ if (!G)
+ return;
+
+ DILocation *L = getDebugLoc();
+ if (!L)
+ return;
+
+ if (auto *Scope = L->getScope())
+ OS << Scope->getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << L->getLine();
+ if (unsigned C = L->getColumn())
+ OS << ':' << C;
+ }
+}
+
+/// Return true if this node is so simple that we should just print it inline
+/// if it appears as an operand.
+static bool shouldPrintInline(const SDNode &Node) {
+ if (Node.getOpcode() == ISD::EntryToken)
+ return false;
+ return Node.getNumOperands() == 0;
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+ for (const SDValue &Op : N->op_values()) {
+ if (shouldPrintInline(*Op.getNode()))
+ continue;
+ if (Op.getNode()->hasOneUse())
+ DumpNodes(Op.getNode(), indent+2, G);
+ }
+
+ dbgs().indent(indent);
+ N->dump(G);
+}
+
+LLVM_DUMP_METHOD void SelectionDAG::dump() const {
+ dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
+
+ for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+ I != E; ++I) {
+ const SDNode *N = &*I;
+ if (!N->hasOneUse() && N != getRoot().getNode() &&
+ (!shouldPrintInline(*N) || N->use_empty()))
+ DumpNodes(N, 2, this);
+ }
+
+ if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+ dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+ OS << PrintNodeId(*this) << ": ";
+ print_types(OS, G);
+ OS << " = " << getOperationName(G);
+ print_details(OS, G);
+}
+
+static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
+ const SDValue Value) {
+ if (!Value.getNode()) {
+ OS << "<null>";
+ return false;
+ } else if (shouldPrintInline(*Value.getNode())) {
+ OS << Value->getOperationName(G) << ':';
+ Value->print_types(OS, G);
+ Value->print_details(OS, G);
+ return true;
+ } else {
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
+ }
+}
+
+typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+ const SelectionDAG *G, VisitedSDNodeSet &once) {
+ if (!once.insert(N).second) // If we've been here before, return now.
+ return;
+
+ // Dump the current SDNode, but don't end the line yet.
+ OS.indent(indent);
+ N->printr(OS, G);
+
+ // Having printed this SDNode, walk the children:
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << " ";
+
+ const SDValue Op = N->getOperand(i);
+ bool printedInline = printOperand(OS, G, Op);
+ if (printedInline)
+ once.insert(Op.getNode());
+ }
+
+ OS << "\n";
+
+ // Dump children that have grandchildren on their own line(s).
+ for (const SDValue &Op : N->op_values())
+ DumpNodesr(OS, Op.getNode(), indent+2, G, once);
+}
+
+void SDNode::dumpr() const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, nullptr, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+ VisitedSDNodeSet once;
+ DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+ const SelectionDAG *G, unsigned depth,
+ unsigned indent) {
+ if (depth == 0)
+ return;
+
+ OS.indent(indent);
+
+ N->print(OS, G);
+
+ if (depth < 1)
+ return;
+
+ for (const SDValue &Op : N->op_values()) {
+ // Don't follow chain operands.
+ if (Op.getValueType() == MVT::Other)
+ continue;
+ OS << '\n';
+ printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2);
+ }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+ unsigned depth) const {
+ printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+ printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+ // Don't print impossibly deep things.
+ dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+ printr(OS, G);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (i) OS << ", "; else OS << " ";
+ printOperand(OS, G, getOperand(i));
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 000000000000..1d61657194c5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,3591 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "isel"
+
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
+STATISTIC(NumFastIselFailLowerArguments,
+ "Number of entry blocks where fast isel failed to lower arguments");
+
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+ cl::desc("Enable extra verbose messages in the \"fast\" "
+ "instruction selector"));
+
+ // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+ // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+ // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+ // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+ // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+ // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+
+// Intrinsic instructions...
+STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call");
+STATISTIC(NumFastIselFailSAddWithOverflow,
+ "Fast isel fails on sadd.with.overflow");
+STATISTIC(NumFastIselFailUAddWithOverflow,
+ "Fast isel fails on uadd.with.overflow");
+STATISTIC(NumFastIselFailSSubWithOverflow,
+ "Fast isel fails on ssub.with.overflow");
+STATISTIC(NumFastIselFailUSubWithOverflow,
+ "Fast isel fails on usub.with.overflow");
+STATISTIC(NumFastIselFailSMulWithOverflow,
+ "Fast isel fails on smul.with.overflow");
+STATISTIC(NumFastIselFailUMulWithOverflow,
+ "Fast isel fails on umul.with.overflow");
+STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress");
+STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call");
+STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call");
+STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call");
+#endif
+
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+ cl::desc("Enable verbose messages in the \"fast\" "
+ "instruction selector"));
+static cl::opt<int> EnableFastISelAbort(
+ "fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower an instruction: 0 disable the abort, 1 will "
+ "abort but for args, calls and terminators, 2 will also "
+ "abort for argument lowering, and 3 will never fallback "
+ "to SelectionDAG."));
+
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+ cl::desc("use Machine Branch Probability Info"),
+ cl::init(true), cl::Hidden);
+
+#ifndef NDEBUG
+static cl::opt<std::string>
+FilterDAGBasicBlockName("filter-view-dags", cl::Hidden,
+ cl::desc("Only display the basic block whose name "
+ "matches this for all view-*-dags options"));
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the first "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the second "
+ "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post legalize types"
+ " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+ cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+ cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+ cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+ ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false,
+ ViewDAGCombineLT = false,
+ ViewISelDAGs = false, ViewSchedDAGs = false,
+ ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+ RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+ cl::init(&createDefaultScheduler), cl::Hidden,
+ cl::desc("Instruction schedulers available (before register"
+ " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+ createDefaultScheduler);
+
+namespace llvm {
+ //===--------------------------------------------------------------------===//
+ /// \brief This class is used by SelectionDAGISel to temporarily override
+ /// the optimization level on a per-function basis.
+ class OptLevelChanger {
+ SelectionDAGISel &IS;
+ CodeGenOpt::Level SavedOptLevel;
+ bool SavedFastISel;
+
+ public:
+ OptLevelChanger(SelectionDAGISel &ISel,
+ CodeGenOpt::Level NewOptLevel) : IS(ISel) {
+ SavedOptLevel = IS.OptLevel;
+ if (NewOptLevel == SavedOptLevel)
+ return;
+ IS.OptLevel = NewOptLevel;
+ IS.TM.setOptLevel(NewOptLevel);
+ DEBUG(dbgs() << "\nChanging optimization level for Function "
+ << IS.MF->getFunction()->getName() << "\n");
+ DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
+ << " ; After: -O" << NewOptLevel << "\n");
+ SavedFastISel = IS.TM.Options.EnableFastISel;
+ if (NewOptLevel == CodeGenOpt::None) {
+ IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
+ DEBUG(dbgs() << "\tFastISel is "
+ << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+ << "\n");
+ }
+ }
+
+ ~OptLevelChanger() {
+ if (IS.OptLevel == SavedOptLevel)
+ return;
+ DEBUG(dbgs() << "\nRestoring optimization level for Function "
+ << IS.MF->getFunction()->getName() << "\n");
+ DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel
+ << " ; After: -O" << SavedOptLevel << "\n");
+ IS.OptLevel = SavedOptLevel;
+ IS.TM.setOptLevel(SavedOptLevel);
+ IS.TM.setFastISel(SavedFastISel);
+ }
+ };
+
+ //===--------------------------------------------------------------------===//
+ /// createDefaultScheduler - This creates an instruction scheduler appropriate
+ /// for the target.
+ ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
+ const TargetLowering *TLI = IS->TLI;
+ const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
+
+ // Try first to see if the Target has its own way of selecting a scheduler
+ if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) {
+ return SchedulerCtor(IS, OptLevel);
+ }
+
+ if (OptLevel == CodeGenOpt::None ||
+ (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
+ TLI->getSchedulingPreference() == Sched::Source)
+ return createSourceListDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::RegPressure)
+ return createBURRListDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::Hybrid)
+ return createHybridListDAGScheduler(IS, OptLevel);
+ if (TLI->getSchedulingPreference() == Sched::VLIW)
+ return createVLIWDAGScheduler(IS, OptLevel);
+ assert(TLI->getSchedulingPreference() == Sched::ILP &&
+ "Unknown sched type!");
+ return createILPListDAGScheduler(IS, OptLevel);
+ }
+} // end namespace llvm
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag. These
+// instructions are special in various ways, which require special support to
+// insert. The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+ MachineBasicBlock *MBB) const {
+#ifndef NDEBUG
+ dbgs() << "If a target marks an instruction with "
+ "'usesCustomInserter', it must implement "
+ "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+ llvm_unreachable(nullptr);
+}
+
+void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+ SDNode *Node) const {
+ assert(!MI.hasPostISelHook() &&
+ "If a target marks an instruction with 'hasPostISelHook', "
+ "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
+ CodeGenOpt::Level OL) :
+ MachineFunctionPass(ID), TM(tm),
+ FuncInfo(new FunctionLoweringInfo()),
+ CurDAG(new SelectionDAG(tm, OL)),
+ SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+ GFI(),
+ OptLevel(OL),
+ DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+SelectionDAGISel::~SelectionDAGISel() {
+ delete SDB;
+ delete CurDAG;
+ delete FuncInfo;
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<GCModuleInfo>();
+ AU.addRequired<StackProtector>();
+ AU.addPreserved<StackProtector>();
+ AU.addPreserved<GCModuleInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it. In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn) {
+ // Loop for blocks with phi nodes.
+ for (BasicBlock &BB : Fn) {
+ PHINode *PN = dyn_cast<PHINode>(BB.begin());
+ if (!PN) continue;
+
+ ReprocessBlock:
+ // For each block with a PHI node, check to see if any of the input values
+ // are potentially trapping constant expressions. Constant expressions are
+ // the only potentially trapping value that can occur as the argument to a
+ // PHI.
+ for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+ if (!CE || !CE->canTrap()) continue;
+
+ // The only case we have to worry about is when the edge is critical.
+ // Since this block has a PHI Node, we assume it has multiple input
+ // edges: check to see if the pred has multiple successors.
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // Okay, we have to split this edge.
+ SplitCriticalEdge(
+ Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
+ CriticalEdgeSplittingOptions().setMergeIdenticalEdges());
+ goto ReprocessBlock;
+ }
+ }
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ // Do some sanity-checking on the command-line options.
+ assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
+ "-fast-isel-verbose requires -fast-isel");
+ assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
+ "-fast-isel-abort > 0 requires -fast-isel");
+
+ const Function &Fn = *mf.getFunction();
+ MF = &mf;
+
+ // Reset the target options before resetting the optimization
+ // level below.
+ // FIXME: This is a horrible hack and should be processed via
+ // codegen looking at the optimization level explicitly when
+ // it wants to look at it.
+ TM.resetTargetOptions(Fn);
+ // Reset OptLevel to None for optnone functions.
+ CodeGenOpt::Level NewOptLevel = OptLevel;
+ if (OptLevel != CodeGenOpt::None && skipFunction(Fn))
+ NewOptLevel = CodeGenOpt::None;
+ OptLevelChanger OLC(*this, NewOptLevel);
+
+ TII = MF->getSubtarget().getInstrInfo();
+ TLI = MF->getSubtarget().getTargetLowering();
+ RegInfo = &MF->getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
+
+ DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+ SplitCriticalSideEffectEdges(const_cast<Function &>(Fn));
+
+ CurDAG->init(*MF);
+ FuncInfo->set(Fn, *MF, CurDAG);
+
+ if (UseMBPI && OptLevel != CodeGenOpt::None)
+ FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ else
+ FuncInfo->BPI = nullptr;
+
+ SDB->init(GFI, *AA, LibInfo);
+
+ MF->setHasInlineAsm(false);
+
+ FuncInfo->SplitCSR = false;
+
+ // We split CSR if the target supports it for the given function
+ // and the function has only return exits.
+ if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) {
+ FuncInfo->SplitCSR = true;
+
+ // Collect all the return blocks.
+ for (const BasicBlock &BB : Fn) {
+ if (!succ_empty(&BB))
+ continue;
+
+ const TerminatorInst *Term = BB.getTerminator();
+ if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term))
+ continue;
+
+ // Bail out if the exit block is not Return nor Unreachable.
+ FuncInfo->SplitCSR = false;
+ break;
+ }
+ }
+
+ MachineBasicBlock *EntryMBB = &MF->front();
+ if (FuncInfo->SplitCSR)
+ // This performs initialization so lowering for SplitCSR will be correct.
+ TLI->initializeSplitCSR(EntryMBB);
+
+ SelectAllBasicBlocks(Fn);
+
+ // If the first basic block in the function has live ins that need to be
+ // copied into vregs, emit the copies into the top of the block before
+ // emitting the code for the block.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
+
+ // Insert copies in the entry block and the return blocks.
+ if (FuncInfo->SplitCSR) {
+ SmallVector<MachineBasicBlock*, 4> Returns;
+ // Collect all the return blocks.
+ for (MachineBasicBlock &MBB : mf) {
+ if (!MBB.succ_empty())
+ continue;
+
+ MachineBasicBlock::iterator Term = MBB.getFirstTerminator();
+ if (Term != MBB.end() && Term->isReturn()) {
+ Returns.push_back(&MBB);
+ continue;
+ }
+ }
+ TLI->insertCopiesSplitCSR(EntryMBB, Returns);
+ }
+
+ DenseMap<unsigned, unsigned> LiveInMap;
+ if (!FuncInfo->ArgDbgValues.empty())
+ for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
+ E = RegInfo->livein_end(); LI != E; ++LI)
+ if (LI->second)
+ LiveInMap.insert(std::make_pair(LI->first, LI->second));
+
+ // Insert DBG_VALUE instructions for function arguments to the entry block.
+ for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+ MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+ bool hasFI = MI->getOperand(0).isFI();
+ unsigned Reg =
+ hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ EntryMBB->insert(EntryMBB->begin(), MI);
+ else {
+ MachineInstr *Def = RegInfo->getVRegDef(Reg);
+ if (Def) {
+ MachineBasicBlock::iterator InsertPos = Def;
+ // FIXME: VR def may not be in entry block.
+ Def->getParent()->insert(std::next(InsertPos), MI);
+ } else
+ DEBUG(dbgs() << "Dropping debug info for dead vreg"
+ << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
+ }
+
+ // If Reg is live-in then update debug info to track its copy in a vreg.
+ DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+ if (LDI != LiveInMap.end()) {
+ assert(!hasFI && "There's no handling of frame pointer updating here yet "
+ "- add if needed");
+ MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+ MachineBasicBlock::iterator InsertPos = Def;
+ const MDNode *Variable = MI->getDebugVariable();
+ const MDNode *Expr = MI->getDebugExpression();
+ DebugLoc DL = MI->getDebugLoc();
+ bool IsIndirect = MI->isIndirectDebugValue();
+ unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ // Def is never a terminator here, so it is ok to increment InsertPos.
+ BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
+ IsIndirect, LDI->second, Offset, Variable, Expr);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = nullptr;
+ for (MachineRegisterInfo::use_instr_iterator
+ UI = RegInfo->use_instr_begin(LDI->second),
+ E = RegInfo->use_instr_end(); UI != E; ) {
+ MachineInstr *UseMI = &*(UI++);
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = nullptr; break;
+ }
+ if (CopyUseMI) {
+ // Use MI's debug location, which describes where Variable was
+ // declared, rather than whatever is attached to CopyUseMI.
+ MachineInstr *NewMI =
+ BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
+ MachineBasicBlock::iterator Pos = CopyUseMI;
+ EntryMBB->insertAfter(Pos, NewMI);
+ }
+ }
+ }
+
+ // Determine if there are any calls in this machine function.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ for (const auto &MBB : *MF) {
+ if (MFI->hasCalls() && MF->hasInlineAsm())
+ break;
+
+ for (const auto &MI : MBB) {
+ const MCInstrDesc &MCID = TII->get(MI.getOpcode());
+ if ((MCID.isCall() && !MCID.isReturn()) ||
+ MI.isStackAligningInlineAsm()) {
+ MFI->setHasCalls(true);
+ }
+ if (MI.isInlineAsm()) {
+ MF->setHasInlineAsm(true);
+ }
+ }
+ }
+
+ // Determine if there is a call to setjmp in the machine function.
+ MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
+
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<unsigned, unsigned>::iterator
+ I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ for (;;) {
+ DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+ if (J == E) break;
+ To = J->second;
+ }
+ // Make sure the new register has a sufficiently constrained register class.
+ if (TargetRegisterInfo::isVirtualRegister(From) &&
+ TargetRegisterInfo::isVirtualRegister(To))
+ MRI.constrainRegClass(To, MRI.getRegClass(From));
+ // Replace it.
+
+
+ // Replacing one register with another won't touch the kill flags.
+ // We need to conservatively clear the kill flags as a kill on the old
+ // register might dominate existing uses of the new register.
+ if (!MRI.use_empty(To))
+ MRI.clearKillFlags(From);
+ MRI.replaceRegWith(From, To);
+ }
+
+ if (TLI->hasCopyImplyingStackAdjustment(MF))
+ MFI->setHasCopyImplyingStackAdjustment(true);
+
+ // Freeze the set of reserved registers now that MachineFrameInfo has been
+ // set up. All the information required by getReservedRegs() should be
+ // available now.
+ MRI.freezeReservedRegs(*MF);
+
+ // Release function-specific state. SDB and CurDAG are already cleared
+ // at this point.
+ FuncInfo->clear();
+
+ DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
+ DEBUG(MF->print(dbgs()));
+
+ return true;
+}
+
+void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End,
+ bool &HadTailCall) {
+ // Lower the instructions. If a call is emitted as a tail call, cease emitting
+ // nodes for this block.
+ for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
+ SDB->visit(*I);
+
+ // Make sure the root of the DAG is up-to-date.
+ CurDAG->setRoot(SDB->getControlRoot());
+ HadTailCall = SDB->HasTailCall;
+ SDB->clear();
+
+ // Final step, emit the lowered DAG as machine code.
+ CodeGenAndEmitDAG();
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+ SmallPtrSet<SDNode*, 16> VisitedNodes;
+ SmallVector<SDNode*, 128> Worklist;
+
+ Worklist.push_back(CurDAG->getRoot().getNode());
+
+ APInt KnownZero;
+ APInt KnownOne;
+
+ do {
+ SDNode *N = Worklist.pop_back_val();
+
+ // If we've already seen this node, ignore it.
+ if (!VisitedNodes.insert(N).second)
+ continue;
+
+ // Otherwise, add all chain operands to the worklist.
+ for (const SDValue &Op : N->op_values())
+ if (Op.getValueType() == MVT::Other)
+ Worklist.push_back(Op.getNode());
+
+ // If this is a CopyToReg with a vreg dest, process it.
+ if (N->getOpcode() != ISD::CopyToReg)
+ continue;
+
+ unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // Ignore non-scalar or non-integer values.
+ SDValue Src = N->getOperand(2);
+ EVT SrcVT = Src.getValueType();
+ if (!SrcVT.isInteger() || SrcVT.isVector())
+ continue;
+
+ unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+ CurDAG->computeKnownBits(Src, KnownZero, KnownOne);
+ FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
+ } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+ std::string GroupName;
+ if (TimePassesIsEnabled)
+ GroupName = "Instruction Selection and Scheduling";
+ std::string BlockName;
+ int BlockNumber = -1;
+ (void)BlockNumber;
+ bool MatchFilterBB = false; (void)MatchFilterBB;
+#ifndef NDEBUG
+ MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
+ FilterDAGBasicBlockName ==
+ FuncInfo->MBB->getBasicBlock()->getName().str());
+#endif
+#ifdef NDEBUG
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+ ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ ViewSUnitDAGs)
+#endif
+ {
+ BlockNumber = FuncInfo->MBB->getNumber();
+ BlockName =
+ (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
+ }
+ DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewDAGCombine1 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+ // Run the DAG combiner in pre-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
+ CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ // Second step, hack on the DAG until it only uses operations and types that
+ // the target supports.
+ if (ViewLegalizeTypesDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize-types input for " + BlockName);
+
+ bool Changed;
+ {
+ NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ CurDAG->NewNodesMustHaveLegalTypes = true;
+
+ if (Changed) {
+ if (ViewDAGCombineLT && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName,
+ TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ }
+
+ {
+ NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
+ Changed = CurDAG->LegalizeVectors();
+ }
+
+ if (Changed) {
+ {
+ NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
+ CurDAG->LegalizeTypes();
+ }
+
+ if (ViewDAGCombineLT && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
+ TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
+ << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
+ }
+
+ if (ViewLegalizeDAGs && MatchFilterBB)
+ CurDAG->viewGraph("legalize input for " + BlockName);
+
+ {
+ NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
+ CurDAG->Legalize();
+ }
+
+ DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewDAGCombine2 && MatchFilterBB)
+ CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+ // Run the DAG combiner in post-legalize mode.
+ {
+ NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
+ CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
+ }
+
+ DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (OptLevel != CodeGenOpt::None)
+ ComputeLiveOutVRegInfo();
+
+ if (ViewISelDAGs && MatchFilterBB)
+ CurDAG->viewGraph("isel input for " + BlockName);
+
+ // Third, instruction select all of the operations to machine code, adding the
+ // code to the MachineBasicBlock.
+ {
+ NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
+ DoInstructionSelection();
+ }
+
+ DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
+ if (ViewSchedDAGs && MatchFilterBB)
+ CurDAG->viewGraph("scheduler input for " + BlockName);
+
+ // Schedule machine code.
+ ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+ {
+ NamedRegionTimer T("Instruction Scheduling", GroupName,
+ TimePassesIsEnabled);
+ Scheduler->Run(CurDAG, FuncInfo->MBB);
+ }
+
+ if (ViewSUnitDAGs && MatchFilterBB)
+ Scheduler->viewGraph();
+
+ // Emit machine code to BB. This can change 'BB' to the last block being
+ // inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
+ {
+ NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+
+ // FuncInfo->InsertPt is passed by reference and set to the end of the
+ // scheduled instructions.
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
+ }
+
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
+ // Free the scheduler state.
+ {
+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
+ TimePassesIsEnabled);
+ delete Scheduler;
+ }
+
+ // Free the SelectionDAG state, now that we're finished with it.
+ CurDAG->clear();
+}
+
+namespace {
+/// ISelUpdater - helper class to handle updates of the instruction selection
+/// graph.
+class ISelUpdater : public SelectionDAG::DAGUpdateListener {
+ SelectionDAG::allnodes_iterator &ISelPosition;
+public:
+ ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
+ : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
+
+ /// NodeDeleted - Handle nodes deleted from the graph. If the node being
+ /// deleted is the current ISelPosition node, update ISelPosition.
+ ///
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ if (ISelPosition == SelectionDAG::allnodes_iterator(N))
+ ++ISelPosition;
+ }
+};
+} // end anonymous namespace
+
+void SelectionDAGISel::DoInstructionSelection() {
+ DEBUG(dbgs() << "===== Instruction selection begins: BB#"
+ << FuncInfo->MBB->getNumber()
+ << " '" << FuncInfo->MBB->getName() << "'\n");
+
+ PreprocessISelDAG();
+
+ // Select target instructions for the DAG.
+ {
+ // Number all nodes with a topological order and set DAGSize.
+ DAGSize = CurDAG->AssignTopologicalOrder();
+
+ // Create a dummy node (which is not added to allnodes), that adds
+ // a reference to the root node, preventing it from being deleted,
+ // and tracking any changes of the root.
+ HandleSDNode Dummy(CurDAG->getRoot());
+ SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode());
+ ++ISelPosition;
+
+ // Make sure that ISelPosition gets properly updated when nodes are deleted
+ // in calls made from this function.
+ ISelUpdater ISU(*CurDAG, ISelPosition);
+
+ // The AllNodes list is now topological-sorted. Visit the
+ // nodes by starting at the end of the list (the root of the
+ // graph) and preceding back toward the beginning (the entry
+ // node).
+ while (ISelPosition != CurDAG->allnodes_begin()) {
+ SDNode *Node = &*--ISelPosition;
+ // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+ // but there are currently some corner cases that it misses. Also, this
+ // makes it theoretically possible to disable the DAGCombiner.
+ if (Node->use_empty())
+ continue;
+
+ Select(Node);
+ }
+
+ CurDAG->setRoot(Dummy.getValue());
+ }
+
+ DEBUG(dbgs() << "===== Instruction selection ends:\n");
+
+ PostprocessISelDAG();
+}
+
+static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
+ for (const User *U : CPI->users()) {
+ if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) {
+ Intrinsic::ID IID = EHPtrCall->getIntrinsicID();
+ if (IID == Intrinsic::eh_exceptionpointer ||
+ IID == Intrinsic::eh_exceptioncode)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+bool SelectionDAGISel::PrepareEHLandingPad() {
+ MachineBasicBlock *MBB = FuncInfo->MBB;
+ const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn();
+ const BasicBlock *LLVMBB = MBB->getBasicBlock();
+ const TargetRegisterClass *PtrRC =
+ TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
+
+ // Catchpads have one live-in register, which typically holds the exception
+ // pointer or code.
+ if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+ if (hasExceptionPointerOrCodeUser(CPI)) {
+ // Get or create the virtual register to hold the pointer or code. Mark
+ // the live in physreg and copy into the vreg.
+ MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+ assert(EHPhysReg && "target lacks exception pointer register");
+ MBB->addLiveIn(EHPhysReg);
+ unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::COPY), VReg)
+ .addReg(EHPhysReg, RegState::Kill);
+ }
+ return true;
+ }
+
+ if (!LLVMBB->isLandingPad())
+ return true;
+
+ // Add a label to mark the beginning of the landing pad. Deletion of the
+ // landing pad can thus be detected via the MachineModuleInfo.
+ MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
+
+ // Assign the call site to the landing pad's begin label.
+ MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+
+ const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
+ BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ .addSym(Label);
+
+ // Mark exception register as live in.
+ if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
+ FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
+
+ // Mark exception selector register as live in.
+ if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
+ FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
+
+ return true;
+}
+
+/// isFoldedOrDeadInstruction - Return true if the specified instruction is
+/// side-effect free and is either dead or folded into a generated instruction.
+/// Return false if it needs to be emitted.
+static bool isFoldedOrDeadInstruction(const Instruction *I,
+ FunctionLoweringInfo *FuncInfo) {
+ return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
+ !isa<TerminatorInst>(I) && // Terminators aren't folded.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !I->isEHPad() && // EH pad instructions aren't folded.
+ !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+}
+
+#ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses. Only those
+// instructions that cause the bail are accounted for. It does not account for
+// instructions higher in the block. Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
+static void collectFailStats(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default: assert (0 && "<Invalid operator> ");
+
+ // Terminators
+ case Instruction::Ret: NumFastIselFailRet++; return;
+ case Instruction::Br: NumFastIselFailBr++; return;
+ case Instruction::Switch: NumFastIselFailSwitch++; return;
+ case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
+ case Instruction::Invoke: NumFastIselFailInvoke++; return;
+ case Instruction::Resume: NumFastIselFailResume++; return;
+ case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+ // Standard binary operators...
+ case Instruction::Add: NumFastIselFailAdd++; return;
+ case Instruction::FAdd: NumFastIselFailFAdd++; return;
+ case Instruction::Sub: NumFastIselFailSub++; return;
+ case Instruction::FSub: NumFastIselFailFSub++; return;
+ case Instruction::Mul: NumFastIselFailMul++; return;
+ case Instruction::FMul: NumFastIselFailFMul++; return;
+ case Instruction::UDiv: NumFastIselFailUDiv++; return;
+ case Instruction::SDiv: NumFastIselFailSDiv++; return;
+ case Instruction::FDiv: NumFastIselFailFDiv++; return;
+ case Instruction::URem: NumFastIselFailURem++; return;
+ case Instruction::SRem: NumFastIselFailSRem++; return;
+ case Instruction::FRem: NumFastIselFailFRem++; return;
+
+ // Logical operators...
+ case Instruction::And: NumFastIselFailAnd++; return;
+ case Instruction::Or: NumFastIselFailOr++; return;
+ case Instruction::Xor: NumFastIselFailXor++; return;
+
+ // Memory instructions...
+ case Instruction::Alloca: NumFastIselFailAlloca++; return;
+ case Instruction::Load: NumFastIselFailLoad++; return;
+ case Instruction::Store: NumFastIselFailStore++; return;
+ case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+ case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
+ case Instruction::Fence: NumFastIselFailFence++; return;
+ case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+ // Convert instructions...
+ case Instruction::Trunc: NumFastIselFailTrunc++; return;
+ case Instruction::ZExt: NumFastIselFailZExt++; return;
+ case Instruction::SExt: NumFastIselFailSExt++; return;
+ case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
+ case Instruction::FPExt: NumFastIselFailFPExt++; return;
+ case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
+ case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
+ case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
+ case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
+ case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+ case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+ case Instruction::BitCast: NumFastIselFailBitCast++; return;
+
+ // Other instructions...
+ case Instruction::ICmp: NumFastIselFailICmp++; return;
+ case Instruction::FCmp: NumFastIselFailFCmp++; return;
+ case Instruction::PHI: NumFastIselFailPHI++; return;
+ case Instruction::Select: NumFastIselFailSelect++; return;
+ case Instruction::Call: {
+ if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ NumFastIselFailIntrinsicCall++; return;
+ case Intrinsic::sadd_with_overflow:
+ NumFastIselFailSAddWithOverflow++; return;
+ case Intrinsic::uadd_with_overflow:
+ NumFastIselFailUAddWithOverflow++; return;
+ case Intrinsic::ssub_with_overflow:
+ NumFastIselFailSSubWithOverflow++; return;
+ case Intrinsic::usub_with_overflow:
+ NumFastIselFailUSubWithOverflow++; return;
+ case Intrinsic::smul_with_overflow:
+ NumFastIselFailSMulWithOverflow++; return;
+ case Intrinsic::umul_with_overflow:
+ NumFastIselFailUMulWithOverflow++; return;
+ case Intrinsic::frameaddress:
+ NumFastIselFailFrameaddress++; return;
+ case Intrinsic::sqrt:
+ NumFastIselFailSqrt++; return;
+ case Intrinsic::experimental_stackmap:
+ NumFastIselFailStackMap++; return;
+ case Intrinsic::experimental_patchpoint_void: // fall-through
+ case Intrinsic::experimental_patchpoint_i64:
+ NumFastIselFailPatchPoint++; return;
+ }
+ }
+ NumFastIselFailCall++;
+ return;
+ }
+ case Instruction::Shl: NumFastIselFailShl++; return;
+ case Instruction::LShr: NumFastIselFailLShr++; return;
+ case Instruction::AShr: NumFastIselFailAShr++; return;
+ case Instruction::VAArg: NumFastIselFailVAArg++; return;
+ case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+ case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
+ case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
+ case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
+ case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
+ case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
+ }
+}
+#endif // NDEBUG
+
+/// Set up SwiftErrorVals by going through the function. If the function has
+/// swifterror argument, it will be the first entry.
+static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
+ FunctionLoweringInfo *FuncInfo) {
+ if (!TLI->supportSwiftError())
+ return;
+
+ FuncInfo->SwiftErrorVals.clear();
+ FuncInfo->SwiftErrorMap.clear();
+ FuncInfo->SwiftErrorWorklist.clear();
+
+ // Check if function has a swifterror argument.
+ for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end();
+ AI != AE; ++AI)
+ if (AI->hasSwiftErrorAttr())
+ FuncInfo->SwiftErrorVals.push_back(&*AI);
+
+ for (const auto &LLVMBB : Fn)
+ for (const auto &Inst : LLVMBB) {
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
+ if (Alloca->isSwiftError())
+ FuncInfo->SwiftErrorVals.push_back(Alloca);
+ }
+}
+
+/// For each basic block, merge incoming swifterror values or simply propagate
+/// them. The merged results will be saved in SwiftErrorMap. For predecessors
+/// that are not yet visited, we create virtual registers to hold the swifterror
+/// values and save them in SwiftErrorWorklist.
+static void mergeIncomingSwiftErrors(FunctionLoweringInfo *FuncInfo,
+ const TargetLowering *TLI,
+ const TargetInstrInfo *TII,
+ const BasicBlock *LLVMBB,
+ SelectionDAGBuilder *SDB) {
+ if (!TLI->supportSwiftError())
+ return;
+
+ // We should only do this when we have swifterror parameter or swifterror
+ // alloc.
+ if (FuncInfo->SwiftErrorVals.empty())
+ return;
+
+ // At beginning of a basic block, insert PHI nodes or get the virtual
+ // register from the only predecessor, and update SwiftErrorMap; if one
+ // of the predecessors is not visited, update SwiftErrorWorklist.
+ // At end of a basic block, if a block is in SwiftErrorWorklist, insert copy
+ // to sync up the virtual register assignment.
+
+ // Always create a virtual register for each swifterror value in entry block.
+ auto &DL = SDB->DAG.getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ if (pred_begin(LLVMBB) == pred_end(LLVMBB)) {
+ for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ // Assign Undef to Vreg. We construct MI directly to make sure it works
+ // with FastISel.
+ BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
+ }
+ return;
+ }
+
+ if (auto *UniquePred = LLVMBB->getUniquePredecessor()) {
+ auto *UniquePredMBB = FuncInfo->MBBMap[UniquePred];
+ if (!FuncInfo->SwiftErrorMap.count(UniquePredMBB)) {
+ // Update SwiftErrorWorklist with a new virtual register.
+ for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ FuncInfo->SwiftErrorWorklist[UniquePredMBB].push_back(VReg);
+ // Propagate the information from the single predecessor.
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
+ }
+ return;
+ }
+ // Propagate the information from the single predecessor.
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB] =
+ FuncInfo->SwiftErrorMap[UniquePredMBB];
+ return;
+ }
+
+ // For the case of multiple predecessors, update SwiftErrorWorklist.
+ // Handle the case where we have two or more predecessors being the same.
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ auto *PredMBB = FuncInfo->MBBMap[*PI];
+ if (!FuncInfo->SwiftErrorMap.count(PredMBB) &&
+ !FuncInfo->SwiftErrorWorklist.count(PredMBB)) {
+ for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ // When we actually visit the basic block PredMBB, we will materialize
+ // the virtual register assignment in copySwiftErrorsToFinalVRegs.
+ FuncInfo->SwiftErrorWorklist[PredMBB].push_back(VReg);
+ }
+ }
+ }
+
+ // For the case of multiple predecessors, create a virtual register for
+ // each swifterror value and generate Phi node.
+ for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
+
+ MachineInstrBuilder SwiftErrorPHI = BuildMI(*FuncInfo->MBB,
+ FuncInfo->MBB->begin(), SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::PHI), VReg);
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ auto *PredMBB = FuncInfo->MBBMap[*PI];
+ unsigned SwiftErrorReg = FuncInfo->SwiftErrorMap.count(PredMBB) ?
+ FuncInfo->SwiftErrorMap[PredMBB][I] :
+ FuncInfo->SwiftErrorWorklist[PredMBB][I];
+ SwiftErrorPHI.addReg(SwiftErrorReg)
+ .addMBB(PredMBB);
+ }
+ }
+}
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ // Initialize the Fast-ISel state, if needed.
+ FastISel *FastIS = nullptr;
+ if (TM.Options.EnableFastISel)
+ FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
+
+ setupSwiftErrorVals(Fn, TLI, FuncInfo);
+
+ // Iterate over all basic blocks in the function.
+ ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+ for (ReversePostOrderTraversal<const Function*>::rpo_iterator
+ I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+ const BasicBlock *LLVMBB = *I;
+
+ if (OptLevel != CodeGenOpt::None) {
+ bool AllPredsVisited = true;
+ for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+ PI != PE; ++PI) {
+ if (!FuncInfo->VisitedBBs.count(*PI)) {
+ AllPredsVisited = false;
+ break;
+ }
+ }
+
+ if (AllPredsVisited) {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ FuncInfo->ComputePHILiveOutRegInfo(PN);
+ } else {
+ for (BasicBlock::const_iterator I = LLVMBB->begin();
+ const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+ FuncInfo->InvalidatePHILiveOutRegInfo(PN);
+ }
+
+ FuncInfo->VisitedBBs.insert(LLVMBB);
+ }
+
+ BasicBlock::const_iterator const Begin =
+ LLVMBB->getFirstNonPHI()->getIterator();
+ BasicBlock::const_iterator const End = LLVMBB->end();
+ BasicBlock::const_iterator BI = End;
+
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ if (!FuncInfo->MBB)
+ continue; // Some blocks like catchpads have no code or MBB.
+ FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+ mergeIncomingSwiftErrors(FuncInfo, TLI, TII, LLVMBB, SDB);
+
+ // Setup an EH landing-pad block.
+ FuncInfo->ExceptionPointerVirtReg = 0;
+ FuncInfo->ExceptionSelectorVirtReg = 0;
+ if (LLVMBB->isEHPad())
+ if (!PrepareEHLandingPad())
+ continue;
+
+ // Before doing SelectionDAG ISel, see if FastISel has been requested.
+ if (FastIS) {
+ FastIS->startNewBlock();
+
+ // Emit code for any incoming arguments. This must happen before
+ // beginning FastISel on the entry block.
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ ++NumEntryBlocks;
+
+ // Lower any arguments needed in this block if this is the entry block.
+ if (!FastIS->lowerArguments()) {
+ // Fast isel failed to lower these arguments
+ ++NumFastIselFailLowerArguments;
+ if (EnableFastISelAbort > 1)
+ report_fatal_error("FastISel didn't lower all arguments");
+
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(nullptr);
+ }
+
+ unsigned NumFastIselRemaining = std::distance(Begin, End);
+ // Do FastISel on as many instructions as possible.
+ for (; BI != Begin; --BI) {
+ const Instruction *Inst = &*std::prev(BI);
+
+ // If we no longer require this instruction, skip it.
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+ --NumFastIselRemaining;
+ continue;
+ }
+
+ // Bottom-up: reset the insert pos at the top, after any local-value
+ // instructions.
+ FastIS->recomputeInsertPt();
+
+ // Try to select the instruction with FastISel.
+ if (FastIS->selectInstruction(Inst)) {
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ // If fast isel succeeded, skip over all the folded instructions, and
+ // then see if there is a load right before the selected instructions.
+ // Try to fold the load if so.
+ const Instruction *BeforeInst = Inst;
+ while (BeforeInst != &*Begin) {
+ BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
+ if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ break;
+ }
+ if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() &&
+ FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
+ // If we succeeded, don't re-select the load.
+ BI = std::next(BasicBlock::const_iterator(BeforeInst));
+ --NumFastIselRemaining;
+ ++NumFastIselSuccess;
+ }
+ continue;
+ }
+
+#ifndef NDEBUG
+ if (EnableFastISelVerbose2)
+ collectFailStats(Inst);
+#endif
+
+ // Then handle certain instructions as single-LLVM-Instruction blocks.
+ if (isa<CallInst>(Inst)) {
+
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ dbgs() << "FastISel missed call: ";
+ Inst->dump();
+ }
+ if (EnableFastISelAbort > 2)
+ // FastISel selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ report_fatal_error("FastISel didn't select the entire block");
+
+ if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
+ !Inst->use_empty()) {
+ unsigned &R = FuncInfo->ValueMap[Inst];
+ if (!R)
+ R = FuncInfo->CreateRegs(Inst->getType());
+ }
+
+ bool HadTailCall = false;
+ MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
+ SelectBasicBlock(Inst->getIterator(), BI, HadTailCall);
+
+ // If the call was emitted as a tail call, we're done with the block.
+ // We also need to delete any previously emitted instructions.
+ if (HadTailCall) {
+ FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end());
+ --BI;
+ break;
+ }
+
+ // Recompute NumFastIselRemaining as Selection DAG instruction
+ // selection may have handled the call, input args, etc.
+ unsigned RemainingNow = std::distance(Begin, BI);
+ NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+ NumFastIselRemaining = RemainingNow;
+ continue;
+ }
+
+ bool ShouldAbort = EnableFastISelAbort;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ if (isa<TerminatorInst>(Inst)) {
+ // Use a different message for terminator misses.
+ dbgs() << "FastISel missed terminator: ";
+ // Don't abort unless for terminator unless the level is really high
+ ShouldAbort = (EnableFastISelAbort > 2);
+ } else {
+ dbgs() << "FastISel miss: ";
+ }
+ Inst->dump();
+ }
+ if (ShouldAbort)
+ // FastISel selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ report_fatal_error("FastISel didn't select the entire block");
+
+ NumFastIselFailures += NumFastIselRemaining;
+ break;
+ }
+
+ FastIS->recomputeInsertPt();
+ } else {
+ // Lower any arguments needed in this block if this is the entry block.
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ ++NumEntryBlocks;
+ LowerArguments(Fn);
+ }
+ }
+ if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) {
+ bool FunctionBasedInstrumentation =
+ TLI->getSSPStackGuardCheck(*Fn.getParent());
+ SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB],
+ FunctionBasedInstrumentation);
+ }
+
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
+ if (Begin != BI) {
+ // Run SelectionDAG instruction selection on the remainder of the block
+ // not handled by FastISel. If FastISel is not run, this is the entire
+ // block.
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
+ }
+
+ FinishBasicBlock();
+ FuncInfo->PHINodesToUpdate.clear();
+ }
+
+ delete FastIS;
+ SDB->clearDanglingDebugInfo();
+ SDB->SPDescriptor.resetPerFunctionState();
+}
+
+/// Given that the input MI is before a partial terminator sequence TSeq, return
+/// true if M + TSeq also a partial terminator sequence.
+///
+/// A Terminator sequence is a sequence of MachineInstrs which at this point in
+/// lowering copy vregs into physical registers, which are then passed into
+/// terminator instructors so we can satisfy ABI constraints. A partial
+/// terminator sequence is an improper subset of a terminator sequence (i.e. it
+/// may be the whole terminator sequence).
+static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
+ // If we do not have a copy or an implicit def, we return true if and only if
+ // MI is a debug value.
+ if (!MI.isCopy() && !MI.isImplicitDef())
+ // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
+ // physical registers if there is debug info associated with the terminator
+ // of our mbb. We want to include said debug info in our terminator
+ // sequence, so we return true in that case.
+ return MI.isDebugValue();
+
+ // We have left the terminator sequence if we are not doing one of the
+ // following:
+ //
+ // 1. Copying a vreg into a physical register.
+ // 2. Copying a vreg into a vreg.
+ // 3. Defining a register via an implicit def.
+
+ // OPI should always be a register definition...
+ MachineInstr::const_mop_iterator OPI = MI.operands_begin();
+ if (!OPI->isReg() || !OPI->isDef())
+ return false;
+
+ // Defining any register via an implicit def is always ok.
+ if (MI.isImplicitDef())
+ return true;
+
+ // Grab the copy source...
+ MachineInstr::const_mop_iterator OPI2 = OPI;
+ ++OPI2;
+ assert(OPI2 != MI.operands_end()
+ && "Should have a copy implying we should have 2 arguments.");
+
+ // Make sure that the copy dest is not a vreg when the copy source is a
+ // physical register.
+ if (!OPI2->isReg() ||
+ (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) &&
+ TargetRegisterInfo::isPhysicalRegister(OPI2->getReg())))
+ return false;
+
+ return true;
+}
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+static MachineBasicBlock::iterator
+FindSplitPointForStackProtector(MachineBasicBlock *BB) {
+ MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
+ //
+ if (SplitPoint == BB->begin())
+ return SplitPoint;
+
+ MachineBasicBlock::iterator Start = BB->begin();
+ MachineBasicBlock::iterator Previous = SplitPoint;
+ --Previous;
+
+ while (MIIsInTerminatorSequence(*Previous)) {
+ SplitPoint = Previous;
+ if (Previous == Start)
+ break;
+ --Previous;
+ }
+
+ return SplitPoint;
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+ DEBUG(dbgs() << "Total amount of phi nodes to update: "
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
+ dbgs() << "Node " << i << " : ("
+ << FuncInfo->PHINodesToUpdate[i].first
+ << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+
+ // Next, now that we know what the last MBB the LLVM BB expanded is, update
+ // PHI nodes in successors.
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+ continue;
+ PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
+ }
+
+ // Handle stack protector.
+ if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
+ // The target provides a guard check function. There is no need to
+ // generate error handling code or to split current basic block.
+ MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+
+ // Add load and check to the basicblock.
+ FuncInfo->MBB = ParentMBB;
+ FuncInfo->InsertPt =
+ FindSplitPointForStackProtector(ParentMBB);
+ SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Clear the Per-BB State.
+ SDB->SPDescriptor.resetPerBBState();
+ } else if (SDB->SPDescriptor.shouldEmitStackProtector()) {
+ MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+ MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB();
+
+ // Find the split point to split the parent mbb. At the same time copy all
+ // physical registers used in the tail of parent mbb into virtual registers
+ // before the split point and back into physical registers after the split
+ // point. This prevents us needing to deal with Live-ins and many other
+ // register allocation issues caused by us splitting the parent mbb. The
+ // register allocator will clean up said virtual copies later on.
+ MachineBasicBlock::iterator SplitPoint =
+ FindSplitPointForStackProtector(ParentMBB);
+
+ // Splice the terminator of ParentMBB into SuccessMBB.
+ SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
+ SplitPoint,
+ ParentMBB->end());
+
+ // Add compare/jump on neq/jump to the parent BB.
+ FuncInfo->MBB = ParentMBB;
+ FuncInfo->InsertPt = ParentMBB->end();
+ SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // CodeGen Failure MBB if we have not codegened it yet.
+ MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB();
+ if (FailureMBB->empty()) {
+ FuncInfo->MBB = FailureMBB;
+ FuncInfo->InsertPt = FailureMBB->end();
+ SDB->visitSPDescriptorFailure(SDB->SPDescriptor);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Clear the Per-BB State.
+ SDB->SPDescriptor.resetPerBBState();
+ }
+
+ // Lower each BitTestBlock.
+ for (auto &BTB : SDB->BitTestCases) {
+ // Lower header first, if it wasn't already lowered
+ if (!BTB.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = BTB.Parent;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitBitTestHeader(BTB, FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ BranchProbability UnhandledProb = BTB.Prob;
+ for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
+ UnhandledProb -= BTB.Cases[j].ExtraProb;
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = BTB.Cases[j].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range. In this case, there is no need
+ // to perform the last bit test, as it will always be true. Instead, make
+ // the second-to-last bit-test fall through to the target of the last bit
+ // test, and delete the last bit test.
+
+ MachineBasicBlock *NextMBB;
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range: fall through to the
+ // target of the final bit test.
+ NextMBB = BTB.Cases[j + 1].TargetBB;
+ } else if (j + 1 == ej) {
+ // For the last bit test, fall through to Default.
+ NextMBB = BTB.Default;
+ } else {
+ // Otherwise, fall through to the next bit test.
+ NextMBB = BTB.Cases[j + 1].ThisBB;
+ }
+
+ SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j],
+ FuncInfo->MBB);
+
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Since we're not going to use the final bit test, remove it.
+ BTB.Cases.pop_back();
+ break;
+ }
+ }
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // This is "default" BB. We have two jumps to it. From "header" BB and
+ // from last "case" BB, unless the latter was skipped.
+ if (PHIBB == BTB.Default) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent);
+ if (!BTB.ContiguousRange) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(BTB.Cases.back().ThisBB);
+ }
+ }
+ // One of "cases" BB.
+ for (unsigned j = 0, ej = BTB.Cases.size();
+ j != ej; ++j) {
+ MachineBasicBlock* cBB = BTB.Cases[j].ThisBB;
+ if (cBB->isSuccessor(PHIBB))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB);
+ }
+ }
+ }
+ SDB->BitTestCases.clear();
+
+ // If the JumpTable record is filled in, then we need to emit a jump table.
+ // Updating the PHI nodes is tricky in this case, since we need to determine
+ // whether the PHI is a successor of the range check MBB or the jump table MBB
+ for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+ // Lower header first, if it wasn't already lowered
+ if (!SDB->JTCases[i].first.Emitted) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
+ FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // Emit the code
+ SDB->visitJumpTable(SDB->JTCases[i].second);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Update PHI Nodes
+ for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+ pi != pe; ++pi) {
+ MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first);
+ MachineBasicBlock *PHIBB = PHI->getParent();
+ assert(PHI->isPHI() &&
+ "This is not a machine PHI node that we are updating!");
+ // "default" BB. We can go there only from header BB.
+ if (PHIBB == SDB->JTCases[i].second.Default)
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
+ .addMBB(SDB->JTCases[i].first.HeaderBB);
+ // JT BB. Just iterate over successors here
+ if (FuncInfo->MBB->isSuccessor(PHIBB))
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
+ }
+ }
+ SDB->JTCases.clear();
+
+ // If we generated any switch lowering information, build and codegen any
+ // additional DAGs necessary.
+ for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+ // Set the current basic block to the mbb we wish to insert the code into
+ FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+ // Determine the unique successors.
+ SmallVector<MachineBasicBlock *, 2> Succs;
+ Succs.push_back(SDB->SwitchCases[i].TrueBB);
+ if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
+ Succs.push_back(SDB->SwitchCases[i].FalseBB);
+
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
+ SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
+ CurDAG->setRoot(SDB->getRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
+
+ // Handle any PHI nodes in successors of this chunk, as if we were coming
+ // from the original BB before switch expansion. Note that PHI nodes can
+ // occur multiple times in PHINodesToUpdate. We have to be very careful to
+ // handle them the right number of times.
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ FuncInfo->MBB = Succs[i];
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // FuncInfo->MBB may have been removed from the CFG if a branch was
+ // constant folded.
+ if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+ for (MachineBasicBlock::iterator
+ MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end();
+ MBBI != MBBE && MBBI->isPHI(); ++MBBI) {
+ MachineInstrBuilder PHI(*MF, MBBI);
+ // This value for this PHI node is recorded in PHINodesToUpdate.
+ for (unsigned pn = 0; ; ++pn) {
+ assert(pn != FuncInfo->PHINodesToUpdate.size() &&
+ "Didn't find PHI entry!");
+ if (FuncInfo->PHINodesToUpdate[pn].first == PHI) {
+ PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB);
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ SDB->SwitchCases.clear();
+}
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+ return ISHeuristic(this, OptLevel);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+ if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255). If
+/// the dag combiner simplified the 255, we still want to match. RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+ int64_t DesiredMaskS) const {
+ const APInt &ActualMask = RHS->getAPIntValue();
+ const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+ // If the actual mask exactly matches, success!
+ if (ActualMask == DesiredMask)
+ return true;
+
+ // If the actual AND mask is allowing unallowed bits, this doesn't match.
+ if (ActualMask.intersects(~DesiredMask))
+ return false;
+
+ // Otherwise, the DAG Combiner may have proven that the value coming in is
+ // either already zero or is not demanded. Check for known zero input bits.
+ APInt NeededMask = DesiredMask & ~ActualMask;
+
+ APInt KnownZero, KnownOne;
+ CurDAG->computeKnownBits(LHS, KnownZero, KnownOne);
+
+ // If all the missing bits in the or are already known to be set, match!
+ if ((NeededMask & KnownOne) == NeededMask)
+ return true;
+
+ // TODO: check to see if missing bits are just not demanded.
+
+ // Otherwise, this pattern doesn't match.
+ return false;
+}
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen. Others should not call it.
+void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
+ const SDLoc &DL) {
+ std::vector<SDValue> InOps;
+ std::swap(InOps, Ops);
+
+ Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+ Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
+ Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
+
+ unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
+
+ while (i != e) {
+ unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+ if (!InlineAsm::isMemKind(Flags)) {
+ // Just skip over this operand, copying the operands verbatim.
+ Ops.insert(Ops.end(), InOps.begin()+i,
+ InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+ i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+ } else {
+ assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+ "Memory operand with multiple values?");
+
+ unsigned TiedToOperand;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) {
+ // We need the constraint ID from the operand this is tied to.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ for (; TiedToOperand; --TiedToOperand) {
+ CurOp += InlineAsm::getNumOperandRegisters(Flags)+1;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ }
+ }
+
+ // Otherwise, this is a memory operand. Ask the target to select it.
+ std::vector<SDValue> SelOps;
+ unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags);
+ if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps))
+ report_fatal_error("Could not match memory address. Inline asm"
+ " failure!");
+
+ // Add this to the output node.
+ unsigned NewFlags =
+ InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+ NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
+ Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
+ Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ i += 2;
+ }
+ }
+
+ // Add the glue input back if present.
+ if (e != InOps.size())
+ Ops.push_back(InOps.back());
+}
+
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
+/// SDNode.
+///
+static SDNode *findGlueUse(SDNode *N) {
+ unsigned FlagResNo = N->getNumValues()-1;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDUse &Use = I.getUse();
+ if (Use.getResNo() == FlagResNo)
+ return Use.getUser();
+ }
+ return nullptr;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+ SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
+ bool IgnoreChains) {
+ // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+ // greater than all of its (recursive) operands. If we scan to a point where
+ // 'use' is smaller than the node we're scanning for, then we know we will
+ // never find it.
+ //
+ // The Use may be -1 (unassigned) if it is a newly allocated node. This can
+ // happen because we scan down to newly selected nodes in the case of glue
+ // uses.
+ if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+ return false;
+
+ // Don't revisit nodes if we already scanned it and didn't fail, we know we
+ // won't fail if we scan it again.
+ if (!Visited.insert(Use).second)
+ return false;
+
+ for (const SDValue &Op : Use->op_values()) {
+ // Ignore chain uses, they are validated by HandleMergeInputChains.
+ if (Op.getValueType() == MVT::Other && IgnoreChains)
+ continue;
+
+ SDNode *N = Op.getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
+ return true;
+ }
+ return false;
+}
+
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+ SDNode *Root) const {
+ if (OptLevel == CodeGenOpt::None) return false;
+ return N.hasOneUse();
+}
+
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+ CodeGenOpt::Level OptLevel,
+ bool IgnoreChains) {
+ if (OptLevel == CodeGenOpt::None) return false;
+
+ // If Root use can somehow reach N through a path that that doesn't contain
+ // U then folding N would create a cycle. e.g. In the following
+ // diagram, Root can reach N through X. If N is folded into into Root, then
+ // X is both a predecessor and a successor of U.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ / //
+ // \ / //
+ // [Root*] //
+ //
+ // * indicates nodes to be folded together.
+ //
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
+ // check if it might reach N.
+ //
+ // [N*] //
+ // ^ ^ //
+ // / \ //
+ // [U*] [X]? //
+ // ^ ^ //
+ // \ \ //
+ // \ | //
+ // [Root*] | //
+ // ^ | //
+ // f | //
+ // | / //
+ // [Y] / //
+ // ^ / //
+ // f / //
+ // | / //
+ // [GU] //
+ //
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
+ // a cycle in the scheduling graph.
+
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
+ EVT VT = Root->getValueType(Root->getNumValues()-1);
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (!GU)
+ break;
+ Root = GU;
+ VT = Root->getValueType(Root->getNumValues()-1);
+
+ // If our query node has a glue result with a use, we've walked up it. If
+ // the user (which has already been selected) has a chain or indirectly uses
+ // the chain, our WalkChainUsers predicate will not consider it. Because of
+ // this, we cannot ignore chains in this predicate.
+ IgnoreChains = false;
+ }
+
+
+ SmallPtrSet<SDNode*, 16> Visited;
+ return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
+}
+
+void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+ SDLoc DL(N);
+
+ std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+ SelectInlineAsmMemoryOperands(Ops, DL);
+
+ const EVT VTs[] = {MVT::Other, MVT::Glue};
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops);
+ New->setNodeId(-1);
+ ReplaceUses(N, New.getNode());
+ CurDAG->RemoveDeadNode(N);
+}
+
+void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
+ SDLoc dl(Op);
+ MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ unsigned Reg =
+ TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
+ *CurDAG);
+ SDValue New = CurDAG->getCopyFromReg(
+ Op->getOperand(0), dl, Reg, Op->getValueType(0));
+ New->setNodeId(-1);
+ ReplaceUses(Op, New.getNode());
+ CurDAG->RemoveDeadNode(Op);
+}
+
+void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
+ SDLoc dl(Op);
+ MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
+ Op->getOperand(2).getValueType(),
+ *CurDAG);
+ SDValue New = CurDAG->getCopyToReg(
+ Op->getOperand(0), dl, Reg, Op->getOperand(2));
+ New->setNodeId(-1);
+ ReplaceUses(Op, New.getNode());
+ CurDAG->RemoveDeadNode(Op);
+}
+
+void SelectionDAGISel::Select_UNDEF(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
+}
+
+/// GetVBR - decode a vbr encoding whose top bit is set.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+ assert(Val >= 128 && "Not a VBR");
+ Val &= 127; // Remove first vbr bit.
+
+ unsigned Shift = 7;
+ uint64_t NextBits;
+ do {
+ NextBits = MatcherTable[Idx++];
+ Val |= (NextBits&127) << Shift;
+ Shift += 7;
+ } while (NextBits & 128);
+
+ return Val;
+}
+
+/// When a match is complete, this method updates uses of interior chain results
+/// to use the new results.
+void SelectionDAGISel::UpdateChains(
+ SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) {
+ SmallVector<SDNode*, 4> NowDeadNodes;
+
+ // Now that all the normal results are replaced, we replace the chain and
+ // glue results if present.
+ if (!ChainNodesMatched.empty()) {
+ assert(InputChain.getNode() &&
+ "Matched input chains but didn't produce a chain");
+ // Loop over all of the nodes we matched that produced a chain result.
+ // Replace all the chain results with the final chain we ended up with.
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ SDNode *ChainNode = ChainNodesMatched[i];
+ assert(ChainNode->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted node left in chain");
+
+ // Don't replace the results of the root node if we're doing a
+ // MorphNodeTo.
+ if (ChainNode == NodeToMatch && isMorphNodeTo)
+ continue;
+
+ SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+ if (ChainVal.getValueType() == MVT::Glue)
+ ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+ assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain);
+
+ // If the node became dead and we haven't already seen it, delete it.
+ if (ChainNode != NodeToMatch && ChainNode->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
+ NowDeadNodes.push_back(ChainNode);
+ }
+ }
+
+ if (!NowDeadNodes.empty())
+ CurDAG->RemoveDeadNodes(NowDeadNodes);
+
+ DEBUG(dbgs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+ CR_Simple,
+ CR_InducesCycle,
+ CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult
+WalkChainUsers(const SDNode *ChainedNode,
+ SmallVectorImpl<SDNode *> &ChainedNodesInPattern,
+ DenseMap<const SDNode *, ChainResult> &TokenFactorResult,
+ SmallVectorImpl<SDNode *> &InteriorChainedNodes) {
+ ChainResult Result = CR_Simple;
+
+ for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+ E = ChainedNode->use_end(); UI != E; ++UI) {
+ // Make sure the use is of the chain, not some other value we produce.
+ if (UI.getUse().getValueType() != MVT::Other) continue;
+
+ SDNode *User = *UI;
+
+ if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
+ continue;
+
+ // If we see an already-selected machine node, then we've gone beyond the
+ // pattern that we're selecting down into the already selected chunk of the
+ // DAG.
+ unsigned UserOpcode = User->getOpcode();
+ if (User->isMachineOpcode() ||
+ UserOpcode == ISD::CopyToReg ||
+ UserOpcode == ISD::CopyFromReg ||
+ UserOpcode == ISD::INLINEASM ||
+ UserOpcode == ISD::EH_LABEL ||
+ UserOpcode == ISD::LIFETIME_START ||
+ UserOpcode == ISD::LIFETIME_END) {
+ // If their node ID got reset to -1 then they've already been selected.
+ // Treat them like a MachineOpcode.
+ if (User->getNodeId() == -1)
+ continue;
+ }
+
+ // If we have a TokenFactor, we handle it specially.
+ if (User->getOpcode() != ISD::TokenFactor) {
+ // If the node isn't a token factor and isn't part of our pattern, then it
+ // must be a random chained node in between two nodes we're selecting.
+ // This happens when we have something like:
+ // x = load ptr
+ // call
+ // y = x+4
+ // store y -> ptr
+ // Because we structurally match the load/store as a read/modify/write,
+ // but the call is chained between them. We cannot fold in this case
+ // because it would induce a cycle in the graph.
+ if (!std::count(ChainedNodesInPattern.begin(),
+ ChainedNodesInPattern.end(), User))
+ return CR_InducesCycle;
+
+ // Otherwise we found a node that is part of our pattern. For example in:
+ // x = load ptr
+ // y = x+4
+ // store y -> ptr
+ // This would happen when we're scanning down from the load and see the
+ // store as a user. Record that there is a use of ChainedNode that is
+ // part of the pattern and keep scanning uses.
+ Result = CR_LeadsToInteriorNode;
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ // If we found a TokenFactor, there are two cases to consider: first if the
+ // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+ // uses of the TF are in our pattern) we just want to ignore it. Second,
+ // the TokenFactor can be sandwiched in between two chained nodes, like so:
+ // [Load chain]
+ // ^
+ // |
+ // [Load]
+ // ^ ^
+ // | \ DAG's like cheese
+ // / \ do you?
+ // / |
+ // [TokenFactor] [Op]
+ // ^ ^
+ // | |
+ // \ /
+ // \ /
+ // [Store]
+ //
+ // In this case, the TokenFactor becomes part of our match and we rewrite it
+ // as a new TokenFactor.
+ //
+ // To distinguish these two cases, do a recursive walk down the uses.
+ auto MemoizeResult = TokenFactorResult.find(User);
+ bool Visited = MemoizeResult != TokenFactorResult.end();
+ // Recursively walk chain users only if the result is not memoized.
+ if (!Visited) {
+ auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult,
+ InteriorChainedNodes);
+ MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first;
+ }
+ switch (MemoizeResult->second) {
+ case CR_Simple:
+ // If the uses of the TokenFactor are just already-selected nodes, ignore
+ // it, it is "below" our pattern.
+ continue;
+ case CR_InducesCycle:
+ // If the uses of the TokenFactor lead to nodes that are not part of our
+ // pattern that are not selected, folding would turn this into a cycle,
+ // bail out now.
+ return CR_InducesCycle;
+ case CR_LeadsToInteriorNode:
+ break; // Otherwise, keep processing.
+ }
+
+ // Okay, we know we're in the interesting interior case. The TokenFactor
+ // is now going to be considered part of the pattern so that we rewrite its
+ // uses (it may have uses that are not part of the pattern) with the
+ // ultimate chain result of the generated code. We will also add its chain
+ // inputs as inputs to the ultimate TokenFactor we create.
+ Result = CR_LeadsToInteriorNode;
+ if (!Visited) {
+ ChainedNodesInPattern.push_back(User);
+ InteriorChainedNodes.push_back(User);
+ }
+ }
+
+ return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains. The
+/// input vector contains a list of all of the chained nodes that we match. We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SelectionDAG *CurDAG) {
+ // Used for memoization. Without it WalkChainUsers could take exponential
+ // time to run.
+ DenseMap<const SDNode *, ChainResult> TokenFactorResult;
+ // Walk all of the chained nodes we've matched, recursively scanning down the
+ // users of the chain result. This adds any TokenFactor nodes that are caught
+ // in between chained nodes to the chained and interior nodes list.
+ SmallVector<SDNode*, 3> InteriorChainedNodes;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+ TokenFactorResult,
+ InteriorChainedNodes) == CR_InducesCycle)
+ return SDValue(); // Would induce a cycle.
+ }
+
+ // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+ // that we are interested in. Form our input TokenFactor node.
+ SmallVector<SDValue, 3> InputChains;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ // Add the input chain of this node to the InputChains list (which will be
+ // the operands of the generated TokenFactor) if it's not an interior node.
+ SDNode *N = ChainNodesMatched[i];
+ if (N->getOpcode() != ISD::TokenFactor) {
+ if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+ continue;
+
+ // Otherwise, add the input chain.
+ SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+ assert(InChain.getValueType() == MVT::Other && "Not a chain");
+ InputChains.push_back(InChain);
+ continue;
+ }
+
+ // If we have a token factor, we want to add all inputs of the token factor
+ // that are not part of the pattern we're matching.
+ for (const SDValue &Op : N->op_values()) {
+ if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+ Op.getNode()))
+ InputChains.push_back(Op);
+ }
+ }
+
+ if (InputChains.size() == 1)
+ return InputChains[0];
+ return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
+ MVT::Other, InputChains);
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+ ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) {
+ // It is possible we're using MorphNodeTo to replace a node with no
+ // normal results with one that has a normal result (or we could be
+ // adding a chain) and the input could have glue and chains as well.
+ // In this case we need to shift the operands down.
+ // FIXME: This is a horrible hack and broken in obscure cases, no worse
+ // than the old isel though.
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
+
+ unsigned NTMNumResults = Node->getNumValues();
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
+ if (NTMNumResults != 1 &&
+ Node->getValueType(NTMNumResults-2) == MVT::Other)
+ OldChainResultNo = NTMNumResults-2;
+ } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+ OldChainResultNo = NTMNumResults-1;
+
+ // Call the underlying SelectionDAG routine to do the transmogrification. Note
+ // that this deletes operands of the old node that become dead.
+ SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ }
+
+ unsigned ResNumResults = Res->getNumValues();
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
+ --ResNumResults;
+
+ // Move the chain reference if needed.
+ if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+ (unsigned)OldChainResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ // Otherwise, no replacement happened because the node already exists. Replace
+ // Uses of the old node with the new one.
+ if (Res != Node) {
+ CurDAG->ReplaceAllUsesWith(Node, Res);
+ CurDAG->RemoveDeadNode(Node);
+ }
+
+ return Res;
+}
+
+/// CheckSame - Implements OP_CheckSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ // Accept if it is exactly the same as a previously recorded node.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ return N == RecordedNodes[RecNo].first;
+}
+
+/// CheckChildSame - Implements OP_CheckChildXSame.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
+ RecordedNodes);
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel) {
+ return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ const SelectionDAGISel &SDISel, SDNode *N) {
+ return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDNode *N) {
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ return N->getOpcode() == Opc;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const TargetLowering *TLI, const DataLayout &DL) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (N.getValueType() == VT) return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering *TLI, const DataLayout &DL,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI,
+ DL);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ return cast<CondCodeSDNode>(N)->get() ==
+ (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (cast<VTSDNode>(N)->getVT() == VT)
+ return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ return C && C->getSExtValue() == Val;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::AND) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::OR) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node. If the current predicate is known to
+/// fail, set Result=true and return anything. If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with. If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+ unsigned Index, SDValue N,
+ bool &Result,
+ const SelectionDAGISel &SDISel,
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ switch (Table[Index++]) {
+ default:
+ Result = false;
+ return Index-1; // Could not evaluate this predicate.
+ case SelectionDAGISel::OPC_CheckSame:
+ Result = !::CheckSame(Table, Index, N, RecordedNodes);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Same:
+ case SelectionDAGISel::OPC_CheckChild1Same:
+ case SelectionDAGISel::OPC_CheckChild2Same:
+ case SelectionDAGISel::OPC_CheckChild3Same:
+ Result = !::CheckChildSame(Table, Index, N, RecordedNodes,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPatternPredicate:
+ Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPredicate:
+ Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckOpcode:
+ Result = !::CheckOpcode(Table, Index, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckType:
+ Result = !::CheckType(Table, Index, N, SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Type:
+ case SelectionDAGISel::OPC_CheckChild1Type:
+ case SelectionDAGISel::OPC_CheckChild2Type:
+ case SelectionDAGISel::OPC_CheckChild3Type:
+ case SelectionDAGISel::OPC_CheckChild4Type:
+ case SelectionDAGISel::OPC_CheckChild5Type:
+ case SelectionDAGISel::OPC_CheckChild6Type:
+ case SelectionDAGISel::OPC_CheckChild7Type:
+ Result = !::CheckChildType(
+ Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(),
+ Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type);
+ return Index;
+ case SelectionDAGISel::OPC_CheckCondCode:
+ Result = !::CheckCondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckValueType:
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ case SelectionDAGISel::OPC_CheckInteger:
+ Result = !::CheckInteger(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Integer:
+ case SelectionDAGISel::OPC_CheckChild1Integer:
+ case SelectionDAGISel::OPC_CheckChild2Integer:
+ case SelectionDAGISel::OPC_CheckChild3Integer:
+ case SelectionDAGISel::OPC_CheckChild4Integer:
+ Result = !::CheckChildInteger(Table, Index, N,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer);
+ return Index;
+ case SelectionDAGISel::OPC_CheckAndImm:
+ Result = !::CheckAndImm(Table, Index, N, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckOrImm:
+ Result = !::CheckOrImm(Table, Index, N, SDISel);
+ return Index;
+ }
+}
+
+namespace {
+struct MatchScope {
+ /// FailIndex - If this match fails, this is the index to continue with.
+ unsigned FailIndex;
+
+ /// NodeStack - The node stack when the scope was formed.
+ SmallVector<SDValue, 4> NodeStack;
+
+ /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+ unsigned NumRecordedNodes;
+
+ /// NumMatchedMemRefs - The number of matched memref entries.
+ unsigned NumMatchedMemRefs;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
+
+ /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+ bool HasChainNodesMatched;
+};
+
+/// \\brief A DAG update listener to keep the matching state
+/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to
+/// change the DAG while matching. X86 addressing mode matcher is an example
+/// for this.
+class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
+{
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes;
+ SmallVectorImpl<MatchScope> &MatchScopes;
+public:
+ MatchStateUpdater(SelectionDAG &DAG,
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN,
+ SmallVectorImpl<MatchScope> &MS) :
+ SelectionDAG::DAGUpdateListener(DAG),
+ RecordedNodes(RN), MatchScopes(MS) { }
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ // Some early-returns here to avoid the search if we deleted the node or
+ // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
+ // do, so it's unnecessary to update matching state at that point).
+ // Neither of these can occur currently because we only install this
+ // update listener during matching a complex patterns.
+ if (!E || E->isMachineOpcode())
+ return;
+ // Performing linear search here does not matter because we almost never
+ // run this code. You'd have to have a CSE during complex pattern
+ // matching.
+ for (auto &I : RecordedNodes)
+ if (I.first.getNode() == N)
+ I.first.setNode(E);
+
+ for (auto &I : MatchScopes)
+ for (auto &J : I.NodeStack)
+ if (J.getNode() == N)
+ J.setNode(E);
+ }
+};
+} // end anonymous namespace
+
+void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
+ const unsigned char *MatcherTable,
+ unsigned TableSize) {
+ // FIXME: Should these even be selected? Handle these cases in the caller?
+ switch (NodeToMatch->getOpcode()) {
+ default:
+ break;
+ case ISD::EntryToken: // These nodes remain the same.
+ case ISD::BasicBlock:
+ case ISD::Register:
+ case ISD::RegisterMask:
+ case ISD::HANDLENODE:
+ case ISD::MDNODE_SDNODE:
+ case ISD::TargetConstant:
+ case ISD::TargetConstantFP:
+ case ISD::TargetConstantPool:
+ case ISD::TargetFrameIndex:
+ case ISD::TargetExternalSymbol:
+ case ISD::MCSymbol:
+ case ISD::TargetBlockAddress:
+ case ISD::TargetJumpTable:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::TargetGlobalAddress:
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ case ISD::EH_LABEL:
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ NodeToMatch->setNodeId(-1); // Mark selected.
+ return;
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+ NodeToMatch->getOperand(0));
+ CurDAG->RemoveDeadNode(NodeToMatch);
+ return;
+ case ISD::INLINEASM:
+ Select_INLINEASM(NodeToMatch);
+ return;
+ case ISD::READ_REGISTER:
+ Select_READ_REGISTER(NodeToMatch);
+ return;
+ case ISD::WRITE_REGISTER:
+ Select_WRITE_REGISTER(NodeToMatch);
+ return;
+ case ISD::UNDEF:
+ Select_UNDEF(NodeToMatch);
+ return;
+ }
+
+ assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+ // Set up the node stack with NodeToMatch as the only node on the stack.
+ SmallVector<SDValue, 8> NodeStack;
+ SDValue N = SDValue(NodeToMatch, 0);
+ NodeStack.push_back(N);
+
+ // MatchScopes - Scopes used when matching, if a match failure happens, this
+ // indicates where to continue checking.
+ SmallVector<MatchScope, 8> MatchScopes;
+
+ // RecordedNodes - This is the set of nodes that have been recorded by the
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
+ // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+ // pattern.
+ SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+ // These are the current input chain and glue for use when generating nodes.
+ // Various Emit operations change these. For example, emitting a copytoreg
+ // uses and updates these.
+ SDValue InputChain, InputGlue;
+
+ // ChainNodesMatched - If a pattern matches nodes that have input/output
+ // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+ // which ones they are. The result is captured into this list so that we can
+ // update the chain results when the pattern is complete.
+ SmallVector<SDNode*, 3> ChainNodesMatched;
+
+ DEBUG(dbgs() << "ISEL: Starting pattern match on root node: ";
+ NodeToMatch->dump(CurDAG);
+ dbgs() << '\n');
+
+ // Determine where to start the interpreter. Normally we start at opcode #0,
+ // but if the state machine starts with an OPC_SwitchOpcode, then we
+ // accelerate the first lookup (which is guaranteed to be hot) with the
+ // OpcodeOffset table.
+ unsigned MatcherIndex = 0;
+
+ if (!OpcodeOffset.empty()) {
+ // Already computed the OpcodeOffset table, just index into it.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
+
+ } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+ // Otherwise, the table isn't computed, but the state machine does start
+ // with an OPC_SwitchOpcode instruction. Populate the table now, since this
+ // is the first time we're selecting an instruction.
+ unsigned Idx = 1;
+ while (1) {
+ // Get the size of this case.
+ unsigned CaseSize = MatcherTable[Idx++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+ if (CaseSize == 0) break;
+
+ // Get the opcode, add the index to the table.
+ uint16_t Opc = MatcherTable[Idx++];
+ Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+ if (Opc >= OpcodeOffset.size())
+ OpcodeOffset.resize((Opc+1)*2);
+ OpcodeOffset[Opc] = Idx;
+ Idx += CaseSize;
+ }
+
+ // Okay, do the lookup for the first opcode.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ }
+
+ while (1) {
+ assert(MatcherIndex < TableSize && "Invalid index");
+#ifndef NDEBUG
+ unsigned CurrentOpcodeIndex = MatcherIndex;
+#endif
+ BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ switch (Opcode) {
+ case OPC_Scope: {
+ // Okay, the semantics of this operation are that we should push a scope
+ // then evaluate the first child. However, pushing a scope only to have
+ // the first check fail (which then pops it) is inefficient. If we can
+ // determine immediately that the first check (or first several) will
+ // immediately fail, don't even bother pushing a scope for them.
+ unsigned FailIndex;
+
+ while (1) {
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+ // Found the end of the scope with no match.
+ if (NumToSkip == 0) {
+ FailIndex = 0;
+ break;
+ }
+
+ FailIndex = MatcherIndex+NumToSkip;
+
+ unsigned MatcherIndexOfPredicate = MatcherIndex;
+ (void)MatcherIndexOfPredicate; // silence warning.
+
+ // If we can't evaluate this predicate without pushing a scope (e.g. if
+ // it is a 'MoveParent') or if the predicate succeeds on this node, we
+ // push the scope and evaluate the full predicate chain.
+ bool Result;
+ MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+ Result, *this, RecordedNodes);
+ if (!Result)
+ break;
+
+ DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at "
+ << "index " << MatcherIndexOfPredicate
+ << ", continuing at " << FailIndex << "\n");
+ ++NumDAGIselRetries;
+
+ // Otherwise, we know that this case of the Scope is guaranteed to fail,
+ // move to the next case.
+ MatcherIndex = FailIndex;
+ }
+
+ // If the whole scope failed to match, bail.
+ if (FailIndex == 0) break;
+
+ // Push a MatchScope which indicates where to go if the first child fails
+ // to match.
+ MatchScope NewEntry;
+ NewEntry.FailIndex = FailIndex;
+ NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+ NewEntry.NumRecordedNodes = RecordedNodes.size();
+ NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+ NewEntry.InputChain = InputChain;
+ NewEntry.InputGlue = InputGlue;
+ NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+ MatchScopes.push_back(NewEntry);
+ continue;
+ }
+ case OPC_RecordNode: {
+ // Remember this node, it may end up being an operand in the pattern.
+ SDNode *Parent = nullptr;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
+ continue;
+ }
+
+ case OPC_RecordChild0: case OPC_RecordChild1:
+ case OPC_RecordChild2: case OPC_RecordChild3:
+ case OPC_RecordChild4: case OPC_RecordChild5:
+ case OPC_RecordChild6: case OPC_RecordChild7: {
+ unsigned ChildNo = Opcode-OPC_RecordChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
+ continue;
+ }
+ case OPC_RecordMemRef:
+ MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+ continue;
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
+ if (N->getNumOperands() != 0 &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
+ continue;
+
+ case OPC_MoveChild: {
+ unsigned ChildNo = MatcherTable[MatcherIndex++];
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveChild0: case OPC_MoveChild1:
+ case OPC_MoveChild2: case OPC_MoveChild3:
+ case OPC_MoveChild4: case OPC_MoveChild5:
+ case OPC_MoveChild6: case OPC_MoveChild7: {
+ unsigned ChildNo = Opcode-OPC_MoveChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveParent:
+ // Pop the current node off the NodeStack.
+ NodeStack.pop_back();
+ assert(!NodeStack.empty() && "Node stack imbalance!");
+ N = NodeStack.back();
+ continue;
+
+ case OPC_CheckSame:
+ if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+ continue;
+
+ case OPC_CheckChild0Same: case OPC_CheckChild1Same:
+ case OPC_CheckChild2Same: case OPC_CheckChild3Same:
+ if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes,
+ Opcode-OPC_CheckChild0Same))
+ break;
+ continue;
+
+ case OPC_CheckPatternPredicate:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ continue;
+ case OPC_CheckPredicate:
+ if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+ N.getNode()))
+ break;
+ continue;
+ case OPC_CheckComplexPat: {
+ unsigned CPNum = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+
+ // If target can modify DAG during matching, keep the matching state
+ // consistent.
+ std::unique_ptr<MatchStateUpdater> MSU;
+ if (ComplexPatternFuncMutatesDAG())
+ MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes,
+ MatchScopes));
+
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
+ RecordedNodes))
+ break;
+ continue;
+ }
+ case OPC_CheckOpcode:
+ if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+ continue;
+
+ case OPC_CheckType:
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+
+ case OPC_SwitchOpcode: {
+ unsigned CurNodeOpcode = N.getOpcode();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
+ // If the opcode matches, then we will execute this case.
+ if (CurNodeOpcode == Opc)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart
+ << " to " << MatcherIndex << "\n");
+ continue;
+ }
+
+ case OPC_SwitchType: {
+ MVT CurNodeVT = N.getSimpleValueType();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (CaseVT == MVT::iPTR)
+ CaseVT = TLI->getPointerTy(CurDAG->getDataLayout());
+
+ // If the VT matches, then we will execute this case.
+ if (CurNodeVT == CaseVT)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+ continue;
+ }
+ case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+ case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+ case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+ case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout(),
+ Opcode - OPC_CheckChild0Type))
+ break;
+ continue;
+ case OPC_CheckCondCode:
+ if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckValueType:
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+ case OPC_CheckInteger:
+ if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckChild0Integer: case OPC_CheckChild1Integer:
+ case OPC_CheckChild2Integer: case OPC_CheckChild3Integer:
+ case OPC_CheckChild4Integer:
+ if (!::CheckChildInteger(MatcherTable, MatcherIndex, N,
+ Opcode-OPC_CheckChild0Integer)) break;
+ continue;
+ case OPC_CheckAndImm:
+ if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckOrImm:
+ if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+
+ case OPC_CheckFoldableChainNode: {
+ assert(NodeStack.size() != 1 && "No parent node");
+ // Verify that all intermediate nodes between the root and this one have
+ // a single use.
+ bool HasMultipleUses = false;
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+ if (!NodeStack[i].hasOneUse()) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+
+ // Check to see that the target thinks this is profitable to fold and that
+ // we can fold it without inducing cycles in the graph.
+ if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch) ||
+ !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch, OptLevel,
+ true/*We validate our own chains*/))
+ break;
+
+ continue;
+ }
+ case OPC_EmitInteger: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch),
+ VT), nullptr));
+ continue;
+ }
+ case OPC_EmitRegister: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), nullptr));
+ continue;
+ }
+ case OPC_EmitRegister2: {
+ // For targets w/ more than 256 register names, the register enum
+ // values are stored in two bytes in the matcher table (just like
+ // opcodes).
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RegNo |= MatcherTable[MatcherIndex++] << 8;
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), nullptr));
+ continue;
+ }
+
+ case OPC_EmitConvertToTarget: {
+ // Convert from IMM/FPIMM to target version.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget");
+ SDValue Imm = RecordedNodes[RecNo].first;
+
+ if (Imm->getOpcode() == ISD::Constant) {
+ const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue();
+ Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch),
+ Imm.getValueType());
+ } else if (Imm->getOpcode() == ISD::ConstantFP) {
+ const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+ Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch),
+ Imm.getValueType());
+ }
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
+ case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1
+ case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2
+ // These are space-optimized forms of OPC_EmitMergeInputChains.
+ assert(!InputChain.getNode() &&
+ "EmitMergeInputChains should be the first chain producing node");
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0;
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (!InputChain.getNode())
+ break; // Failed to merge.
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains: {
+ assert(!InputChain.getNode() &&
+ "EmitMergeInputChains should be the first chain producing node");
+ // This node gets a list of nodes we matched in the input that have
+ // chains. We want to token factor all of the input chains to these nodes
+ // together. However, if any of the input chains is actually one of the
+ // nodes matched in this pattern, then we have an intra-match reference.
+ // Ignore these because the newly token factored chain should not refer to
+ // the old nodes.
+ unsigned NumChains = MatcherTable[MatcherIndex++];
+ assert(NumChains != 0 && "Can't TF zero chains");
+
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ for (unsigned i = 0; i != NumChains; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].first.hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+ }
+
+ // If the inner loop broke out, the match fails.
+ if (ChainNodesMatched.empty())
+ break;
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (!InputChain.getNode())
+ break; // Failed to merge.
+
+ continue;
+ }
+
+ case OPC_EmitCopyToReg: {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
+ unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+
+ if (!InputChain.getNode())
+ InputChain = CurDAG->getEntryNode();
+
+ InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch),
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
+ continue;
+ }
+
+ case OPC_EmitNodeXForm: {
+ unsigned XFormNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm");
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr));
+ continue;
+ }
+
+ case OPC_EmitNode: case OPC_MorphNodeTo:
+ case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
+ case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: {
+ uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+ TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+ // Get the result VT list.
+ unsigned NumVTs;
+ // If this is one of the compressed forms, get the number of VTs based
+ // on the Opcode. Otherwise read the next byte from the table.
+ if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2)
+ NumVTs = Opcode - OPC_MorphNodeTo0;
+ else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2)
+ NumVTs = Opcode - OPC_EmitNode0;
+ else
+ NumVTs = MatcherTable[MatcherIndex++];
+ SmallVector<EVT, 4> VTs;
+ for (unsigned i = 0; i != NumVTs; ++i) {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (VT == MVT::iPTR)
+ VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy;
+ VTs.push_back(VT);
+ }
+
+ if (EmitNodeInfo & OPFL_Chain)
+ VTs.push_back(MVT::Other);
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
+ // This is hot code, so optimize the two most common cases of 1 and 2
+ // results.
+ SDVTList VTList;
+ if (VTs.size() == 1)
+ VTList = CurDAG->getVTList(VTs[0]);
+ else if (VTs.size() == 2)
+ VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+ else
+ VTList = CurDAG->getVTList(VTs);
+
+ // Get the operand list.
+ unsigned NumOps = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+ Ops.push_back(RecordedNodes[RecNo].first);
+ }
+
+ // If there are variadic operands to add, handle them now.
+ if (EmitNodeInfo & OPFL_VariadicInfo) {
+ // Determine the start index to copy from.
+ unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+ FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+ assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+ "Invalid variadic node");
+ // Copy all of the variadic operands, not including a potential glue
+ // input.
+ for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+ i != e; ++i) {
+ SDValue V = NodeToMatch->getOperand(i);
+ if (V.getValueType() == MVT::Glue) break;
+ Ops.push_back(V);
+ }
+ }
+
+ // If this has chain/glue inputs, add them.
+ if (EmitNodeInfo & OPFL_Chain)
+ Ops.push_back(InputChain);
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
+ Ops.push_back(InputGlue);
+
+ // Create the node.
+ SDNode *Res = nullptr;
+ bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
+ (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2);
+ if (!IsMorphNodeTo) {
+ // If this is a normal EmitNode command, just create the new node and
+ // add the results to the RecordedNodes list.
+ Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch),
+ VTList, Ops);
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ nullptr));
+ }
+
+ } else {
+ assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE &&
+ "NodeToMatch was removed partway through selection");
+ SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
+ SDNode *E) {
+ auto &Chain = ChainNodesMatched;
+ assert((!E || llvm::find(Chain, N) == Chain.end()) &&
+ "Chain node replaced during MorphNode");
+ Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
+ });
+ Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo);
+ }
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
+ if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-2);
+ } else if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-1);
+
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
+ // accumulated memrefs onto it.
+ //
+ // FIXME: This is vastly incorrect for patterns with multiple outputs
+ // instructions that access memory and for ComplexPatterns that match
+ // loads.
+ if (EmitNodeInfo & OPFL_MemRefs) {
+ // Only attach load or store memory operands if the generated
+ // instruction may load or store.
+ const MCInstrDesc &MCID = TII->get(TargetOpc);
+ bool mayLoad = MCID.mayLoad();
+ bool mayStore = MCID.mayStore();
+
+ unsigned NumMemRefs = 0;
+ for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ ++NumMemRefs;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ ++NumMemRefs;
+ } else {
+ ++NumMemRefs;
+ }
+ }
+
+ MachineSDNode::mmo_iterator MemRefs =
+ MF->allocateMemRefsArray(NumMemRefs);
+
+ MachineSDNode::mmo_iterator MemRefsPos = MemRefs;
+ for (SmallVectorImpl<MachineMemOperand *>::const_iterator I =
+ MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) {
+ if ((*I)->isLoad()) {
+ if (mayLoad)
+ *MemRefsPos++ = *I;
+ } else if ((*I)->isStore()) {
+ if (mayStore)
+ *MemRefsPos++ = *I;
+ } else {
+ *MemRefsPos++ = *I;
+ }
+ }
+
+ cast<MachineSDNode>(Res)
+ ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
+ }
+
+ DEBUG(dbgs() << " "
+ << (IsMorphNodeTo ? "Morphed" : "Created")
+ << " node: "; Res->dump(CurDAG); dbgs() << "\n");
+
+ // If this was a MorphNodeTo then we're completely done!
+ if (IsMorphNodeTo) {
+ // Update chain uses.
+ UpdateChains(Res, InputChain, ChainNodesMatched, true);
+ return;
+ }
+ continue;
+ }
+
+ case OPC_CompleteMatch: {
+ // The match has been completed, and any new nodes (if any) have been
+ // created. Patch up references to the matched dag to use the newly
+ // created nodes.
+ unsigned NumResults = MatcherTable[MatcherIndex++];
+
+ for (unsigned i = 0; i != NumResults; ++i) {
+ unsigned ResSlot = MatcherTable[MatcherIndex++];
+ if (ResSlot & 128)
+ ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+ assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch");
+ SDValue Res = RecordedNodes[ResSlot].first;
+
+ assert(i < NodeToMatch->getNumValues() &&
+ NodeToMatch->getValueType(i) != MVT::Other &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
+ "Invalid number of results to complete!");
+ assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+ NodeToMatch->getValueType(i) == MVT::iPTR ||
+ Res.getValueType() == MVT::iPTR ||
+ NodeToMatch->getValueType(i).getSizeInBits() ==
+ Res.getValueType().getSizeInBits()) &&
+ "invalid replacement");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+ }
+
+ // Update chain uses.
+ UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false);
+
+ // If the root node defines glue, we need to update it to the glue result.
+ // TODO: This never happens in our tests and I think it can be removed /
+ // replaced with an assert, but if we do it this the way the change is
+ // NFC.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) ==
+ MVT::Glue &&
+ InputGlue.getNode())
+ CurDAG->ReplaceAllUsesOfValueWith(
+ SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue);
+
+ assert(NodeToMatch->use_empty() &&
+ "Didn't replace all uses of the node?");
+ CurDAG->RemoveDeadNode(NodeToMatch);
+
+ return;
+ }
+ }
+
+ // If the code reached this point, then the match failed. See if there is
+ // another child to try in the current 'Scope', otherwise pop it until we
+ // find a case to check.
+ DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
+ ++NumDAGIselRetries;
+ while (1) {
+ if (MatchScopes.empty()) {
+ CannotYetSelect(NodeToMatch);
+ return;
+ }
+
+ // Restore the interpreter state back to the point where the scope was
+ // formed.
+ MatchScope &LastScope = MatchScopes.back();
+ RecordedNodes.resize(LastScope.NumRecordedNodes);
+ NodeStack.clear();
+ NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+ N = NodeStack.back();
+
+ if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+ MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+ MatcherIndex = LastScope.FailIndex;
+
+ DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");
+
+ InputChain = LastScope.InputChain;
+ InputGlue = LastScope.InputGlue;
+ if (!LastScope.HasChainNodesMatched)
+ ChainNodesMatched.clear();
+
+ // Check to see what the offset is at the new MatcherIndex. If it is zero
+ // we have reached the end of this scope, otherwise we have another child
+ // in the current scope to try.
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+ // If we have another child in this scope to match, update FailIndex and
+ // try it.
+ if (NumToSkip != 0) {
+ LastScope.FailIndex = MatcherIndex+NumToSkip;
+ break;
+ }
+
+ // End of this scope, pop it and try the next child in the containing
+ // scope.
+ MatchScopes.pop_back();
+ }
+ }
+}
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Cannot select: ";
+
+ if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+ N->getOpcode() != ISD::INTRINSIC_VOID) {
+ N->printrFull(Msg, CurDAG);
+ Msg << "\nIn function: " << MF->getName();
+ } else {
+ bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+ unsigned iid =
+ cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+ if (iid < Intrinsic::num_intrinsics)
+ Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+ else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+ Msg << "target intrinsic %" << TII->getName(iid);
+ else
+ Msg << "unknown intrinsic #" << iid;
+ }
+ report_fatal_error(Msg.str());
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 000000000000..2764688518c2
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,307 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "dag-printer"
+
+namespace llvm {
+ template<>
+ struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
+
+ static bool hasEdgeDestLabels() {
+ return true;
+ }
+
+ static unsigned numEdgeDestLabels(const void *Node) {
+ return ((const SDNode *) Node)->getNumValues();
+ }
+
+ static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+ return ((const SDNode *) Node)->getValueType(i).getEVTString();
+ }
+
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+ return itostr(I - SDNodeIterator::begin((const SDNode *) Node));
+ }
+
+ /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+ /// should actually target another edge source, not a node. If this method
+ /// is implemented, getEdgeTarget should be implemented.
+ template<typename EdgeIter>
+ static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+ return true;
+ }
+
+ /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+ /// called to determine which outgoing edge of Node is the target of this
+ /// edge.
+ template<typename EdgeIter>
+ static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+ SDNode *TargetNode = *I;
+ SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+ std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+ return NI;
+ }
+
+ static std::string getGraphName(const SelectionDAG *G) {
+ return G->getMachineFunction().getName();
+ }
+
+ static bool renderGraphFromBottomUp() {
+ return true;
+ }
+
+ static std::string getNodeIdentifierLabel(const SDNode *Node,
+ const SelectionDAG *Graph) {
+ std::string R;
+ raw_string_ostream OS(R);
+#ifndef NDEBUG
+ OS << 't' << Node->PersistentId;
+#else
+ OS << static_cast<const void *>(Node);
+#endif
+ return R;
+ }
+
+ /// If you want to override the dot attributes printed for a particular
+ /// edge, override this method.
+ template<typename EdgeIter>
+ static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+ const SelectionDAG *Graph) {
+ SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+ EVT VT = Op.getValueType();
+ if (VT == MVT::Glue)
+ return "color=red,style=bold";
+ else if (VT == MVT::Other)
+ return "color=blue,style=dashed";
+ return "";
+ }
+
+
+ static std::string getSimpleNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ std::string Result = Node->getOperationName(G);
+ {
+ raw_string_ostream OS(Result);
+ Node->print_details(OS, G);
+ }
+ return Result;
+ }
+ std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+ static std::string getNodeAttributes(const SDNode *N,
+ const SelectionDAG *Graph) {
+#ifndef NDEBUG
+ const std::string &Attrs = Graph->getGraphAttrs(N);
+ if (!Attrs.empty()) {
+ if (Attrs.find("shape=") == std::string::npos)
+ return std::string("shape=Mrecord,") + Attrs;
+ else
+ return Attrs;
+ }
+#endif
+ return "shape=Mrecord";
+ }
+
+ static void addCustomGraphFeatures(SelectionDAG *G,
+ GraphWriter<SelectionDAG*> &GW) {
+ GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot");
+ if (G->getRoot().getNode())
+ GW.emitEdge(nullptr, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ "color=blue,style=dashed");
+ }
+ };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+ const SelectionDAG *G) {
+ return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+ ViewGraph(this, "dag." + getMachineFunction().getName(),
+ false, Title);
+#else
+ errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+ viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+ NodeGraphAttrs.clear();
+#else
+ errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = Attrs;
+#else
+ errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+ std::map<const SDNode *, std::string>::const_iterator I =
+ NodeGraphAttrs.find(N);
+
+ if (I != NodeGraphAttrs.end())
+ return I->second;
+ else
+ return "";
+#else
+ errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+ return std::string();
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+ errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor. Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+ int level, bool &printed) {
+ bool hit_limit = false;
+
+#ifndef NDEBUG
+ if (level >= 20) {
+ if (!printed) {
+ printed = true;
+ DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+ }
+ return true;
+ }
+
+ unsigned oldSize = visited.size();
+ visited.insert(N);
+ if (visited.size() != oldSize) {
+ setGraphColor(N, Color);
+ for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+ i != iend;
+ ++i) {
+ hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+ }
+ }
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+ return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+ DenseSet<SDNode *> visited;
+ bool printed = false;
+ if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+ // Visually mark that we hit the limit
+ if (strcmp(Color, "red") == 0) {
+ setSubgraphColorHelper(N, "blue", visited, 0, printed);
+ } else if (strcmp(Color, "yellow") == 0) {
+ setSubgraphColorHelper(N, "green", visited, 0, printed);
+ }
+ }
+
+#else
+ errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+ << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+ std::string s;
+ raw_string_ostream O(s);
+ O << "SU(" << SU->NodeNum << "): ";
+ if (SU->getNode()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ O << DOTGraphTraits<SelectionDAG*>
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
+ O << "\n ";
+ }
+ } else {
+ O << "CROSS RC COPY";
+ }
+ return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+ if (DAG) {
+ // Draw a special "GraphRoot" node to indicate the root of the graph.
+ GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot");
+ const SDNode *N = DAG->getRoot().getNode();
+ if (N && N->getNodeId() != -1)
+ GW.emitEdge(nullptr, -1, &SUnits[N->getNodeId()], -1,
+ "color=blue,style=dashed");
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
new file mode 100644
index 000000000000..55f70f7d9fd3
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -0,0 +1,17 @@
+//===-- SelectionDAGTargetInfo.cpp - SelectionDAG Info --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGTargetInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+using namespace llvm;
+
+SelectionDAGTargetInfo::~SelectionDAGTargetInfo() {}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
new file mode 100644
index 000000000000..90aaba247265
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -0,0 +1,958 @@
+//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#include "StatepointLowering.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "statepoint-lowering"
+
+STATISTIC(NumSlotsAllocatedForStatepoints,
+ "Number of stack slots allocated for statepoints");
+STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered");
+STATISTIC(StatepointMaxSlotsRequired,
+ "Maximum number of stack slots required for a singe statepoint");
+
+static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
+ SelectionDAGBuilder &Builder, uint64_t Value) {
+ SDLoc L = Builder.getCurSDLoc();
+ Ops.push_back(Builder.DAG.getTargetConstant(StackMaps::ConstantOp, L,
+ MVT::i64));
+ Ops.push_back(Builder.DAG.getTargetConstant(Value, L, MVT::i64));
+}
+
+void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
+ // Consistency check
+ assert(PendingGCRelocateCalls.empty() &&
+ "Trying to visit statepoint before finished processing previous one");
+ Locations.clear();
+ NextSlotToAllocate = 0;
+ // Need to resize this on each safepoint - we need the two to stay in sync and
+ // the clear patterns of a SelectionDAGBuilder have no relation to
+ // FunctionLoweringInfo. SmallBitVector::reset initializes all bits to false.
+ AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
+}
+
+void StatepointLoweringState::clear() {
+ Locations.clear();
+ AllocatedStackSlots.clear();
+ assert(PendingGCRelocateCalls.empty() &&
+ "cleared before statepoint sequence completed");
+}
+
+SDValue
+StatepointLoweringState::allocateStackSlot(EVT ValueType,
+ SelectionDAGBuilder &Builder) {
+ NumSlotsAllocatedForStatepoints++;
+ auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+
+ unsigned SpillSize = ValueType.getSizeInBits() / 8;
+ assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
+
+ // First look for a previously created stack slot which is not in
+ // use (accounting for the fact arbitrary slots may already be
+ // reserved), or to create a new stack slot and use it.
+
+ const size_t NumSlots = AllocatedStackSlots.size();
+ assert(NextSlotToAllocate <= NumSlots && "Broken invariant");
+
+ // The stack slots in StatepointStackSlots beyond the first NumSlots were
+ // added in this instance of StatepointLoweringState, and cannot be re-used.
+ assert(NumSlots <= Builder.FuncInfo.StatepointStackSlots.size() &&
+ "Broken invariant");
+
+ for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) {
+ if (!AllocatedStackSlots.test(NextSlotToAllocate)) {
+ const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate];
+ if (MFI->getObjectSize(FI) == SpillSize) {
+ AllocatedStackSlots.set(NextSlotToAllocate);
+ return Builder.DAG.getFrameIndex(FI, ValueType);
+ }
+ }
+ }
+
+ // Couldn't find a free slot, so create a new one:
+
+ SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
+ const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ MFI->markAsStatepointSpillSlotObjectIndex(FI);
+
+ Builder.FuncInfo.StatepointStackSlots.push_back(FI);
+
+ StatepointMaxSlotsRequired = std::max<unsigned long>(
+ StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size());
+
+ return SpillSlot;
+}
+
+/// Utility function for reservePreviousStackSlotForValue. Tries to find
+/// stack slot index to which we have spilled value for previous statepoints.
+/// LookUpDepth specifies maximum DFS depth this function is allowed to look.
+static Optional<int> findPreviousSpillSlot(const Value *Val,
+ SelectionDAGBuilder &Builder,
+ int LookUpDepth) {
+ // Can not look any further - give up now
+ if (LookUpDepth <= 0)
+ return None;
+
+ // Spill location is known for gc relocates
+ if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
+ const auto &SpillMap =
+ Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()];
+
+ auto It = SpillMap.find(Relocate->getDerivedPtr());
+ if (It == SpillMap.end())
+ return None;
+
+ return It->second;
+ }
+
+ // Look through bitcast instructions.
+ if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val))
+ return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1);
+
+ // Look through phi nodes
+ // All incoming values should have same known stack slot, otherwise result
+ // is unknown.
+ if (const PHINode *Phi = dyn_cast<PHINode>(Val)) {
+ Optional<int> MergedResult = None;
+
+ for (auto &IncomingValue : Phi->incoming_values()) {
+ Optional<int> SpillSlot =
+ findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
+ if (!SpillSlot.hasValue())
+ return None;
+
+ if (MergedResult.hasValue() && *MergedResult != *SpillSlot)
+ return None;
+
+ MergedResult = SpillSlot;
+ }
+ return MergedResult;
+ }
+
+ // TODO: We can do better for PHI nodes. In cases like this:
+ // ptr = phi(relocated_pointer, not_relocated_pointer)
+ // statepoint(ptr)
+ // We will return that stack slot for ptr is unknown. And later we might
+ // assign different stack slots for ptr and relocated_pointer. This limits
+ // llvm's ability to remove redundant stores.
+ // Unfortunately it's hard to accomplish in current infrastructure.
+ // We use this function to eliminate spill store completely, while
+ // in example we still need to emit store, but instead of any location
+ // we need to use special "preferred" location.
+
+ // TODO: handle simple updates. If a value is modified and the original
+ // value is no longer live, it would be nice to put the modified value in the
+ // same slot. This allows folding of the memory accesses for some
+ // instructions types (like an increment).
+ // statepoint (i)
+ // i1 = i+1
+ // statepoint (i1)
+ // However we need to be careful for cases like this:
+ // statepoint(i)
+ // i1 = i+1
+ // statepoint(i, i1)
+ // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just
+ // put handling of simple modifications in this function like it's done
+ // for bitcasts we might end up reserving i's slot for 'i+1' because order in
+ // which we visit values is unspecified.
+
+ // Don't know any information about this instruction
+ return None;
+}
+
+/// Try to find existing copies of the incoming values in stack slots used for
+/// statepoint spilling. If we can find a spill slot for the incoming value,
+/// mark that slot as allocated, and reuse the same slot for this safepoint.
+/// This helps to avoid series of loads and stores that only serve to reshuffle
+/// values on the stack between calls.
+static void reservePreviousStackSlotForValue(const Value *IncomingValue,
+ SelectionDAGBuilder &Builder) {
+
+ SDValue Incoming = Builder.getValue(IncomingValue);
+
+ if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
+ // We won't need to spill this, so no need to check for previously
+ // allocated stack slots
+ return;
+ }
+
+ SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming);
+ if (OldLocation.getNode())
+ // Duplicates in input
+ return;
+
+ const int LookUpDepth = 6;
+ Optional<int> Index =
+ findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth);
+ if (!Index.hasValue())
+ return;
+
+ const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots;
+
+ auto SlotIt = find(StatepointSlots, *Index);
+ assert(SlotIt != StatepointSlots.end() &&
+ "Value spilled to the unknown stack slot");
+
+ // This is one of our dedicated lowering slots
+ const int Offset = std::distance(StatepointSlots.begin(), SlotIt);
+ if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
+ // stack slot already assigned to someone else, can't use it!
+ // TODO: currently we reserve space for gc arguments after doing
+ // normal allocation for deopt arguments. We should reserve for
+ // _all_ deopt and gc arguments, then start allocating. This
+ // will prevent some moves being inserted when vm state changes,
+ // but gc state doesn't between two calls.
+ return;
+ }
+ // Reserve this stack slot
+ Builder.StatepointLowering.reserveStackSlot(Offset);
+
+ // Cache this slot so we find it when going through the normal
+ // assignment loop.
+ SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType());
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+}
+
+/// Remove any duplicate (as SDValues) from the derived pointer pairs. This
+/// is not required for correctness. It's purpose is to reduce the size of
+/// StackMap section. It has no effect on the number of spill slots required
+/// or the actual lowering.
+static void
+removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
+ SmallVectorImpl<const Value *> &Ptrs,
+ SmallVectorImpl<const GCRelocateInst *> &Relocs,
+ SelectionDAGBuilder &Builder,
+ FunctionLoweringInfo::StatepointSpillMap &SSM) {
+ DenseMap<SDValue, const Value *> Seen;
+
+ SmallVector<const Value *, 64> NewBases, NewPtrs;
+ SmallVector<const GCRelocateInst *, 64> NewRelocs;
+ for (size_t i = 0, e = Ptrs.size(); i < e; i++) {
+ SDValue SD = Builder.getValue(Ptrs[i]);
+ auto SeenIt = Seen.find(SD);
+
+ if (SeenIt == Seen.end()) {
+ // Only add non-duplicates
+ NewBases.push_back(Bases[i]);
+ NewPtrs.push_back(Ptrs[i]);
+ NewRelocs.push_back(Relocs[i]);
+ Seen[SD] = Ptrs[i];
+ } else {
+ // Duplicate pointer found, note in SSM and move on:
+ SSM.DuplicateMap[Ptrs[i]] = SeenIt->second;
+ }
+ }
+ assert(Bases.size() >= NewBases.size());
+ assert(Ptrs.size() >= NewPtrs.size());
+ assert(Relocs.size() >= NewRelocs.size());
+ Bases = NewBases;
+ Ptrs = NewPtrs;
+ Relocs = NewRelocs;
+ assert(Ptrs.size() == Bases.size());
+ assert(Ptrs.size() == Relocs.size());
+}
+
+/// Extract call from statepoint, lower it and return pointer to the
+/// call node. Also update NodeMap so that getValue(statepoint) will
+/// reference lowered call result
+static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
+ SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) {
+
+ SDValue ReturnValue, CallEndVal;
+ std::tie(ReturnValue, CallEndVal) =
+ Builder.lowerInvokable(SI.CLI, SI.EHPadBB);
+ SDNode *CallEnd = CallEndVal.getNode();
+
+ // Get a call instruction from the call sequence chain. Tail calls are not
+ // allowed. The following code is essentially reverse engineering X86's
+ // LowerCallTo.
+ //
+ // We are expecting DAG to have the following form:
+ //
+ // ch = eh_label (only in case of invoke statepoint)
+ // ch, glue = callseq_start ch
+ // ch, glue = X86::Call ch, glue
+ // ch, glue = callseq_end ch, glue
+ // get_return_value ch, glue
+ //
+ // get_return_value can either be a sequence of CopyFromReg instructions
+ // to grab the return value from the return register(s), or it can be a LOAD
+ // to load a value returned by reference via a stack slot.
+
+ bool HasDef = !SI.CLI.RetTy->isVoidTy();
+ if (HasDef) {
+ if (CallEnd->getOpcode() == ISD::LOAD)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ else
+ while (CallEnd->getOpcode() == ISD::CopyFromReg)
+ CallEnd = CallEnd->getOperand(0).getNode();
+ }
+
+ assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
+ return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode());
+}
+
+/// Spill a value incoming to the statepoint. It might be either part of
+/// vmstate
+/// or gcstate. In both cases unconditionally spill it on the stack unless it
+/// is a null constant. Return pair with first element being frame index
+/// containing saved value and second element with outgoing chain from the
+/// emitted store
+static std::pair<SDValue, SDValue>
+spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
+ SelectionDAGBuilder &Builder) {
+ SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+
+ // Emit new store if we didn't do it for this ptr before
+ if (!Loc.getNode()) {
+ Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(),
+ Builder);
+ int Index = cast<FrameIndexSDNode>(Loc)->getIndex();
+ // We use TargetFrameIndex so that isel will not select it into LEA
+ Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
+
+ // TODO: We can create TokenFactor node instead of
+ // chaining stores one after another, this may allow
+ // a bit more optimal scheduling for them
+
+#ifndef NDEBUG
+ // Right now we always allocate spill slots that are of the same
+ // size as the value we're about to spill (the size of spillee can
+ // vary since we spill vectors of pointers too). At some point we
+ // can consider allowing spills of smaller values to larger slots
+ // (i.e. change the '==' in the assert below to a '>=').
+ auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+ assert((MFI->getObjectSize(Index) * 8) ==
+ Incoming.getValueType().getSizeInBits() &&
+ "Bad spill: stack slot does not match!");
+#endif
+
+ Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
+ MachinePointerInfo::getFixedStack(
+ Builder.DAG.getMachineFunction(), Index));
+
+ Builder.StatepointLowering.setLocation(Incoming, Loc);
+ }
+
+ assert(Loc.getNode());
+ return std::make_pair(Loc, Chain);
+}
+
+/// Lower a single value incoming to a statepoint node. This value can be
+/// either a deopt value or a gc value, the handling is the same. We special
+/// case constants and allocas, then fall back to spilling if required.
+static void lowerIncomingStatepointValue(SDValue Incoming,
+ SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder &Builder) {
+ SDValue Chain = Builder.getRoot();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
+ // If the original value was a constant, make sure it gets recorded as
+ // such in the stackmap. This is required so that the consumer can
+ // parse any internal format to the deopt state. It also handles null
+ // pointers and other constant pointers in GC states. Note the constant
+ // vectors do not appear to actually hit this path and that anything larger
+ // than an i64 value (not type!) will fail asserts here.
+ pushStackMapConstant(Ops, Builder, C->getSExtValue());
+ } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint (this is only
+ // really meaningful for a deopt value. For GC, we'd be trying to
+ // relocate the address of the alloca itself?)
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+ Incoming.getValueType()));
+ } else {
+ // Otherwise, locate a spill slot and explicitly spill it so it
+ // can be found by the runtime later. We currently do not support
+ // tracking values through callee saved registers to their eventual
+ // spill location. This would be a useful optimization, but would
+ // need to be optional since it requires a lot of complexity on the
+ // runtime side which not all would support.
+ auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder);
+ Ops.push_back(Res.first);
+ Chain = Res.second;
+ }
+
+ Builder.DAG.setRoot(Chain);
+}
+
+/// Lower deopt state and gc pointer arguments of the statepoint. The actual
+/// lowering is described in lowerIncomingStatepointValue. This function is
+/// responsible for lowering everything in the right position and playing some
+/// tricks to avoid redundant stack manipulation where possible. On
+/// completion, 'Ops' will contain ready to use operands for machine code
+/// statepoint. The chain nodes will have already been created and the DAG root
+/// will be set to the last value spilled (if any were).
+static void
+lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
+ SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SelectionDAGBuilder &Builder) {
+ // Lower the deopt and gc arguments for this statepoint. Layout will be:
+ // deopt argument length, deopt arguments.., gc arguments...
+#ifndef NDEBUG
+ if (auto *GFI = Builder.GFI) {
+ // Check that each of the gc pointer and bases we've gotten out of the
+ // safepoint is something the strategy thinks might be a pointer (or vector
+ // of pointers) into the GC heap. This is basically just here to help catch
+ // errors during statepoint insertion. TODO: This should actually be in the
+ // Verifier, but we can't get to the GCStrategy from there (yet).
+ GCStrategy &S = GFI->getStrategy();
+ for (const Value *V : SI.Bases) {
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed base pointer found in statepoint");
+ }
+ }
+ for (const Value *V : SI.Ptrs) {
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
+ if (Opt.hasValue()) {
+ assert(Opt.getValue() &&
+ "non gc managed derived pointer found in statepoint");
+ }
+ }
+ } else {
+ assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!");
+ assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!");
+ }
+#endif
+
+ // Before we actually start lowering (and allocating spill slots for values),
+ // reserve any stack slots which we judge to be profitable to reuse for a
+ // particular value. This is purely an optimization over the code below and
+ // doesn't change semantics at all. It is important for performance that we
+ // reserve slots for both deopt and gc values before lowering either.
+ for (const Value *V : SI.DeoptState) {
+ reservePreviousStackSlotForValue(V, Builder);
+ }
+ for (unsigned i = 0; i < SI.Bases.size(); ++i) {
+ reservePreviousStackSlotForValue(SI.Bases[i], Builder);
+ reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
+ }
+
+ // First, prefix the list with the number of unique values to be
+ // lowered. Note that this is the number of *Values* not the
+ // number of SDValues required to lower them.
+ const int NumVMSArgs = SI.DeoptState.size();
+ pushStackMapConstant(Ops, Builder, NumVMSArgs);
+
+ // The vm state arguments are lowered in an opaque manner. We do not know
+ // what type of values are contained within.
+ for (const Value *V : SI.DeoptState) {
+ SDValue Incoming = Builder.getValue(V);
+ lowerIncomingStatepointValue(Incoming, Ops, Builder);
+ }
+
+ // Finally, go ahead and lower all the gc arguments. There's no prefixed
+ // length for this one. After lowering, we'll have the base and pointer
+ // arrays interwoven with each (lowered) base pointer immediately followed by
+ // it's (lowered) derived pointer. i.e
+ // (base[0], ptr[0], base[1], ptr[1], ...)
+ for (unsigned i = 0; i < SI.Bases.size(); ++i) {
+ const Value *Base = SI.Bases[i];
+ lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder);
+
+ const Value *Ptr = SI.Ptrs[i];
+ lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder);
+ }
+
+ // If there are any explicit spill slots passed to the statepoint, record
+ // them, but otherwise do not do anything special. These are user provided
+ // allocas and give control over placement to the consumer. In this case,
+ // it is the contents of the slot which may get updated, not the pointer to
+ // the alloca
+ for (Value *V : SI.GCArgs) {
+ SDValue Incoming = Builder.getValue(V);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+ // This handles allocas as arguments to the statepoint
+ Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
+ Incoming.getValueType()));
+ }
+ }
+
+ // Record computed locations for all lowered values.
+ // This can not be embedded in lowering loops as we need to record *all*
+ // values, while previous loops account only values with unique SDValues.
+ const Instruction *StatepointInstr = SI.StatepointInstr;
+ auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr];
+
+ for (const GCRelocateInst *Relocate : SI.GCRelocates) {
+ const Value *V = Relocate->getDerivedPtr();
+ SDValue SDV = Builder.getValue(V);
+ SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
+
+ if (Loc.getNode()) {
+ SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
+ } else {
+ // Record value as visited, but not spilled. This is case for allocas
+ // and constants. For this values we can avoid emitting spill load while
+ // visiting corresponding gc_relocate.
+ // Actually we do not need to record them in this map at all.
+ // We do this only to check that we are not relocating any unvisited
+ // value.
+ SpillMap.SlotMap[V] = None;
+
+ // Default llvm mechanisms for exporting values which are used in
+ // different basic blocks does not work for gc relocates.
+ // Note that it would be incorrect to teach llvm that all relocates are
+ // uses of the corresponding values so that it would automatically
+ // export them. Relocates of the spilled values does not use original
+ // value.
+ if (Relocate->getParent() != StatepointInstr->getParent())
+ Builder.ExportFromCurrentBlock(V);
+ }
+ }
+}
+
+SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
+ SelectionDAGBuilder::StatepointLoweringInfo &SI) {
+ // The basic scheme here is that information about both the original call and
+ // the safepoint is encoded in the CallInst. We create a temporary call and
+ // lower it, then reverse engineer the calling sequence.
+
+ NumOfStatepoints++;
+ // Clear state
+ StatepointLowering.startNewStatepoint(*this);
+
+#ifndef NDEBUG
+ // We schedule gc relocates before removeDuplicateGCPtrs since we _will_
+ // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs.
+ for (auto *Reloc : SI.GCRelocates)
+ if (Reloc->getParent() == SI.StatepointInstr->getParent())
+ StatepointLowering.scheduleRelocCall(*Reloc);
+#endif
+
+ // Remove any redundant llvm::Values which map to the same SDValue as another
+ // input. Also has the effect of removing duplicates in the original
+ // llvm::Value input list as well. This is a useful optimization for
+ // reducing the size of the StackMap section. It has no other impact.
+ removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this,
+ FuncInfo.StatepointSpillMaps[SI.StatepointInstr]);
+ assert(SI.Bases.size() == SI.Ptrs.size() &&
+ SI.Ptrs.size() == SI.GCRelocates.size());
+
+ // Lower statepoint vmstate and gcstate arguments
+ SmallVector<SDValue, 10> LoweredMetaArgs;
+ lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this);
+
+ // Now that we've emitted the spills, we need to update the root so that the
+ // call sequence is ordered correctly.
+ SI.CLI.setChain(getRoot());
+
+ // Get call node, we will replace it later with statepoint
+ SDValue ReturnVal;
+ SDNode *CallNode;
+ std::tie(ReturnVal, CallNode) =
+ lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports);
+
+ // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
+ // nodes with all the appropriate arguments and return values.
+
+ // Call Node: Chain, Target, {Args}, RegMask, [Glue]
+ SDValue Chain = CallNode->getOperand(0);
+
+ SDValue Glue;
+ bool CallHasIncomingGlue = CallNode->getGluedNode();
+ if (CallHasIncomingGlue) {
+ // Glue is always last operand
+ Glue = CallNode->getOperand(CallNode->getNumOperands() - 1);
+ }
+
+ // Build the GC_TRANSITION_START node if necessary.
+ //
+ // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the
+ // order in which they appear in the call to the statepoint intrinsic. If
+ // any of the operands is a pointer-typed, that operand is immediately
+ // followed by a SRCVALUE for the pointer that may be used during lowering
+ // (e.g. to form MachinePointerInfo values for loads/stores).
+ const bool IsGCTransition =
+ (SI.StatepointFlags & (uint64_t)StatepointFlags::GCTransition) ==
+ (uint64_t)StatepointFlags::GCTransition;
+ if (IsGCTransition) {
+ SmallVector<SDValue, 8> TSOps;
+
+ // Add chain
+ TSOps.push_back(Chain);
+
+ // Add GC transition arguments
+ for (const Value *V : SI.GCTransitionArgs) {
+ TSOps.push_back(getValue(V));
+ if (V->getType()->isPointerTy())
+ TSOps.push_back(DAG.getSrcValue(V));
+ }
+
+ // Add glue if necessary
+ if (CallHasIncomingGlue)
+ TSOps.push_back(Glue);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ SDValue GCTransitionStart =
+ DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps);
+
+ Chain = GCTransitionStart.getValue(0);
+ Glue = GCTransitionStart.getValue(1);
+ }
+
+ // TODO: Currently, all of these operands are being marked as read/write in
+ // PrologEpilougeInserter.cpp, we should special case the VMState arguments
+ // and flags to be read-only.
+ SmallVector<SDValue, 40> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ Ops.push_back(DAG.getTargetConstant(SI.ID, getCurSDLoc(), MVT::i64));
+ Ops.push_back(
+ DAG.getTargetConstant(SI.NumPatchBytes, getCurSDLoc(), MVT::i32));
+
+ // Calculate and push starting position of vmstate arguments
+ // Get number of arguments incoming directly into call node
+ unsigned NumCallRegArgs =
+ CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3);
+ Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32));
+
+ // Add call target
+ SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0);
+ Ops.push_back(CallTarget);
+
+ // Add call arguments
+ // Get position of register mask in the call
+ SDNode::op_iterator RegMaskIt;
+ if (CallHasIncomingGlue)
+ RegMaskIt = CallNode->op_end() - 2;
+ else
+ RegMaskIt = CallNode->op_end() - 1;
+ Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt);
+
+ // Add a constant argument for the calling convention
+ pushStackMapConstant(Ops, *this, SI.CLI.CallConv);
+
+ // Add a constant argument for the flags
+ uint64_t Flags = SI.StatepointFlags;
+ assert(((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) &&
+ "Unknown flag used");
+ pushStackMapConstant(Ops, *this, Flags);
+
+ // Insert all vmstate and gcstate arguments
+ Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end());
+
+ // Add register mask from call node
+ Ops.push_back(*RegMaskIt);
+
+ // Add chain
+ Ops.push_back(Chain);
+
+ // Same for the glue, but we add it only if original call had it
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Compute return values. Provide a glue output since we consume one as
+ // input. This allows someone else to chain off us as needed.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ SDNode *StatepointMCNode =
+ DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+
+ SDNode *SinkNode = StatepointMCNode;
+
+ // Build the GC_TRANSITION_END node if necessary.
+ //
+ // See the comment above regarding GC_TRANSITION_START for the layout of
+ // the operands to the GC_TRANSITION_END node.
+ if (IsGCTransition) {
+ SmallVector<SDValue, 8> TEOps;
+
+ // Add chain
+ TEOps.push_back(SDValue(StatepointMCNode, 0));
+
+ // Add GC transition arguments
+ for (const Value *V : SI.GCTransitionArgs) {
+ TEOps.push_back(getValue(V));
+ if (V->getType()->isPointerTy())
+ TEOps.push_back(DAG.getSrcValue(V));
+ }
+
+ // Add glue
+ TEOps.push_back(SDValue(StatepointMCNode, 1));
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+ SDValue GCTransitionStart =
+ DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps);
+
+ SinkNode = GCTransitionStart.getNode();
+ }
+
+ // Replace original call
+ DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
+ // Remove original call node
+ DAG.DeleteNode(CallNode);
+
+ // DON'T set the root - under the assumption that it's already set past the
+ // inserted node we created.
+
+ // TODO: A better future implementation would be to emit a single variable
+ // argument, variable return value STATEPOINT node here and then hookup the
+ // return value of each gc.relocate to the respective output of the
+ // previously emitted STATEPOINT value. Unfortunately, this doesn't appear
+ // to actually be possible today.
+
+ return ReturnVal;
+}
+
+void
+SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
+ const BasicBlock *EHPadBB /*= nullptr*/) {
+ assert(ISP.getCallSite().getCallingConv() != CallingConv::AnyReg &&
+ "anyregcc is not supported on statepoints!");
+
+#ifndef NDEBUG
+ // If this is a malformed statepoint, report it early to simplify debugging.
+ // This should catch any IR level mistake that's made when constructing or
+ // transforming statepoints.
+ ISP.verify();
+
+ // Check that the associated GCStrategy expects to encounter statepoints.
+ assert(GFI->getStrategy().useStatepoints() &&
+ "GCStrategy does not expect to encounter statepoints");
+#endif
+
+ SDValue ActualCallee;
+
+ if (ISP.getNumPatchBytes() > 0) {
+ // If we've been asked to emit a nop sequence instead of a call instruction
+ // for this statepoint then don't lower the call target, but use a constant
+ // `null` instead. Not lowering the call target lets statepoint clients get
+ // away without providing a physical address for the symbolic call target at
+ // link time.
+
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ const auto &DL = DAG.getDataLayout();
+
+ unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
+ ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS));
+ } else {
+ ActualCallee = getValue(ISP.getCalledValue());
+ }
+
+ StatepointLoweringInfo SI(DAG);
+ populateCallLoweringInfo(SI.CLI, ISP.getCallSite(),
+ ImmutableStatepoint::CallArgsBeginPos,
+ ISP.getNumCallArgs(), ActualCallee,
+ ISP.getActualReturnType(), false /* IsPatchPoint */);
+
+ for (const GCRelocateInst *Relocate : ISP.getRelocates()) {
+ SI.GCRelocates.push_back(Relocate);
+ SI.Bases.push_back(Relocate->getBasePtr());
+ SI.Ptrs.push_back(Relocate->getDerivedPtr());
+ }
+
+ SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
+ SI.StatepointInstr = ISP.getInstruction();
+ SI.GCTransitionArgs =
+ ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
+ SI.ID = ISP.getID();
+ SI.DeoptState = ArrayRef<const Use>(ISP.vm_state_begin(), ISP.vm_state_end());
+ SI.StatepointFlags = ISP.getFlags();
+ SI.NumPatchBytes = ISP.getNumPatchBytes();
+ SI.EHPadBB = EHPadBB;
+
+ SDValue ReturnValue = LowerAsSTATEPOINT(SI);
+
+ // Export the result value if needed
+ const GCResultInst *GCResult = ISP.getGCResult();
+ Type *RetTy = ISP.getActualReturnType();
+ if (!RetTy->isVoidTy() && GCResult) {
+ if (GCResult->getParent() != ISP.getCallSite().getParent()) {
+ // Result value will be used in a different basic block so we need to
+ // export it now. Default exporting mechanism will not work here because
+ // statepoint call has a different type than the actual call. It means
+ // that by default llvm will create export register of the wrong type
+ // (always i32 in our case). So instead we need to create export register
+ // with correct type manually.
+ // TODO: To eliminate this problem we can remove gc.result intrinsics
+ // completely and make statepoint call to return a tuple.
+ unsigned Reg = FuncInfo.CreateRegs(RetTy);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), Reg, RetTy);
+ SDValue Chain = DAG.getEntryNode();
+
+ RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
+ PendingExports.push_back(Chain);
+ FuncInfo.ValueMap[ISP.getInstruction()] = Reg;
+ } else {
+ // Result value will be used in a same basic block. Don't export it or
+ // perform any explicit register copies.
+ // We'll replace the actuall call node shortly. gc_result will grab
+ // this value.
+ setValue(ISP.getInstruction(), ReturnValue);
+ }
+ } else {
+ // The token value is never used from here on, just generate a poison value
+ setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc()));
+ }
+}
+
+void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
+ ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB,
+ bool VarArgDisallowed, bool ForceVoidReturnTy) {
+ StatepointLoweringInfo SI(DAG);
+ unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin();
+ populateCallLoweringInfo(
+ SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(), Callee,
+ ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : CS.getType(),
+ false);
+ if (!VarArgDisallowed)
+ SI.CLI.IsVarArg = CS.getFunctionType()->isVarArg();
+
+ auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt);
+
+ unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;
+
+ auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes());
+ SI.ID = SD.StatepointID.getValueOr(DefaultID);
+ SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);
+
+ SI.DeoptState =
+ ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end());
+ SI.StatepointFlags = static_cast<uint64_t>(StatepointFlags::None);
+ SI.EHPadBB = EHPadBB;
+
+ // NB! The GC arguments are deliberately left empty.
+
+ if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
+ const Instruction *Inst = CS.getInstruction();
+ ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal);
+ setValue(Inst, ReturnVal);
+ }
+}
+
+void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
+ ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB) {
+ LowerCallSiteWithDeoptBundleImpl(CS, Callee, EHPadBB,
+ /* VarArgDisallowed = */ false,
+ /* ForceVoidReturnTy = */ false);
+}
+
+void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
+ // The result value of the gc_result is simply the result of the actual
+ // call. We've already emitted this, so just grab the value.
+ const Instruction *I = CI.getStatepoint();
+
+ if (I->getParent() != CI.getParent()) {
+ // Statepoint is in different basic block so we should have stored call
+ // result in a virtual register.
+ // We can not use default getValue() functionality to copy value from this
+ // register because statepoint and actual call return types can be
+ // different, and getValue() will use CopyFromReg of the wrong type,
+ // which is always i32 in our case.
+ PointerType *CalleeType = cast<PointerType>(
+ ImmutableStatepoint(I).getCalledValue()->getType());
+ Type *RetTy =
+ cast<FunctionType>(CalleeType->getElementType())->getReturnType();
+ SDValue CopyFromReg = getCopyFromRegs(I, RetTy);
+
+ assert(CopyFromReg.getNode());
+ setValue(&CI, CopyFromReg);
+ } else {
+ setValue(&CI, getValue(I));
+ }
+}
+
+void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
+#ifndef NDEBUG
+ // Consistency check
+ // We skip this check for relocates not in the same basic block as thier
+ // statepoint. It would be too expensive to preserve validation info through
+ // different basic blocks.
+ if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
+ StatepointLowering.relocCallVisited(Relocate);
+
+ auto *Ty = Relocate.getType()->getScalarType();
+ if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty))
+ assert(*IsManaged && "Non gc managed pointer relocated!");
+#endif
+
+ const Value *DerivedPtr = Relocate.getDerivedPtr();
+ SDValue SD = getValue(DerivedPtr);
+
+ auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()];
+ auto SlotIt = SpillMap.find(DerivedPtr);
+ assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");
+ Optional<int> DerivedPtrLocation = SlotIt->second;
+
+ // We didn't need to spill these special cases (constants and allocas).
+ // See the handling in spillIncomingValueForStatepoint for detail.
+ if (!DerivedPtrLocation) {
+ setValue(&Relocate, SD);
+ return;
+ }
+
+ SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation,
+ SD.getValueType());
+
+ // Be conservative: flush all pending loads
+ // TODO: Probably we can be less restrictive on this,
+ // it may allow more scheduling opportunities.
+ SDValue Chain = getRoot();
+
+ SDValue SpillLoad =
+ DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+ *DerivedPtrLocation));
+
+ // Again, be conservative, don't emit pending loads
+ DAG.setRoot(SpillLoad.getValue(1));
+
+ assert(SpillLoad.getNode());
+ setValue(&Relocate, SpillLoad);
+}
+
+void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::DEOPTIMIZE),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ // We don't lower calls to __llvm_deoptimize as varargs, but as a regular
+ // call. We also do not lower the return value to any virtual register, and
+ // change the immediately following return to a trap instruction.
+ LowerCallSiteWithDeoptBundleImpl(CI, Callee, /* EHPadBB = */ nullptr,
+ /* VarArgDisallowed = */ true,
+ /* ForceVoidReturnTy = */ true);
+}
+
+void SelectionDAGBuilder::LowerDeoptimizingReturn() {
+ // We do not lower the return value from llvm.deoptimize to any virtual
+ // register, and change the immediately following return to a trap
+ // instruction.
+ if (DAG.getTarget().Options.TrapUnreachable)
+ DAG.setRoot(
+ DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+}
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
new file mode 100644
index 000000000000..b043184003a0
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -0,0 +1,118 @@
+//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes support code use by SelectionDAGBuilder when lowering a
+// statepoint sequence in SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+class SelectionDAGBuilder;
+
+/// This class tracks both per-statepoint and per-selectiondag information.
+/// For each statepoint it tracks locations of it's gc valuess (incoming and
+/// relocated) and list of gcreloc calls scheduled for visiting (this is
+/// used for a debug mode consistency check only). The spill slot tracking
+/// works in concert with information in FunctionLoweringInfo.
+class StatepointLoweringState {
+public:
+ StatepointLoweringState() : NextSlotToAllocate(0) {}
+
+ /// Reset all state tracking for a newly encountered safepoint. Also
+ /// performs some consistency checking.
+ void startNewStatepoint(SelectionDAGBuilder &Builder);
+
+ /// Clear the memory usage of this object. This is called from
+ /// SelectionDAGBuilder::clear. We require this is never called in the
+ /// midst of processing a statepoint sequence.
+ void clear();
+
+ /// Returns the spill location of a value incoming to the current
+ /// statepoint. Will return SDValue() if this value hasn't been
+ /// spilled. Otherwise, the value has already been spilled and no
+ /// further action is required by the caller.
+ SDValue getLocation(SDValue Val) {
+ auto I = Locations.find(Val);
+ if (I == Locations.end())
+ return SDValue();
+ return I->second;
+ }
+
+ void setLocation(SDValue Val, SDValue Location) {
+ assert(!Locations.count(Val) &&
+ "Trying to allocate already allocated location");
+ Locations[Val] = Location;
+ }
+
+ /// Record the fact that we expect to encounter a given gc_relocate
+ /// before the next statepoint. If we don't see it, we'll report
+ /// an assertion.
+ void scheduleRelocCall(const CallInst &RelocCall) {
+ PendingGCRelocateCalls.push_back(&RelocCall);
+ }
+
+ /// Remove this gc_relocate from the list we're expecting to see
+ /// before the next statepoint. If we weren't expecting to see
+ /// it, we'll report an assertion.
+ void relocCallVisited(const CallInst &RelocCall) {
+ auto I = find(PendingGCRelocateCalls, &RelocCall);
+ assert(I != PendingGCRelocateCalls.end() &&
+ "Visited unexpected gcrelocate call");
+ PendingGCRelocateCalls.erase(I);
+ }
+
+ // TODO: Should add consistency tracking to ensure we encounter
+ // expected gc_result calls too.
+
+ /// Get a stack slot we can use to store an value of type ValueType. This
+ /// will hopefully be a recylced slot from another statepoint.
+ SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder);
+
+ void reserveStackSlot(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ assert(!AllocatedStackSlots.test(Offset) && "already reserved!");
+ assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!");
+ AllocatedStackSlots.set(Offset);
+ }
+
+ bool isStackSlotAllocated(int Offset) {
+ assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
+ "out of bounds");
+ return AllocatedStackSlots.test(Offset);
+ }
+
+private:
+ /// Maps pre-relocation value (gc pointer directly incoming into statepoint)
+ /// into it's location (currently only stack slots)
+ DenseMap<SDValue, SDValue> Locations;
+
+ /// A boolean indicator for each slot listed in the FunctionInfo as to
+ /// whether it has been used in the current statepoint. Since we try to
+ /// preserve stack slots across safepoints, there can be gaps in which
+ /// slots have been allocated.
+ SmallBitVector AllocatedStackSlots;
+
+ /// Points just beyond the last slot known to have been allocated
+ unsigned NextSlotToAllocate;
+
+ /// Keep track of pending gcrelocate calls for consistency check
+ SmallVector<const CallInst *, 10> PendingGCRelocateCalls;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 000000000000..f2bc88a98597
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,3550 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cctype>
+using namespace llvm;
+
+/// NOTE: The TargetMachine owns TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm)
+ : TargetLoweringBase(tm) {}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ return nullptr;
+}
+
+bool TargetLowering::isPositionIndependent() const {
+ return getTargetMachine().isPositionIndependent();
+}
+
+/// Check whether a given call node is in tail position within its function. If
+/// so, it sets Chain to the input chain of the tail call.
+bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ SDValue &Chain) const {
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias).hasAttributes())
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return isUsedByReturnOnly(Node, Chain);
+}
+
+bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
+ const uint32_t *CallerPreservedMask,
+ const SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<SDValue> &OutVals) const {
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ const CCValAssign &ArgLoc = ArgLocs[I];
+ if (!ArgLoc.isRegLoc())
+ continue;
+ unsigned Reg = ArgLoc.getLocReg();
+ // Only look at callee saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
+ continue;
+ // Check that we pass the value used for the caller.
+ // (We look for a CopyFromReg reading a virtual register that is used
+ // for the function live-in value of register Reg)
+ SDValue Value = OutVals[I];
+ if (Value->getOpcode() != ISD::CopyFromReg)
+ return false;
+ unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
+ if (MRI.getLiveInPhysReg(ArgReg) != Reg)
+ return false;
+ }
+ return true;
+}
+
+/// \brief Set CallLoweringInfo attribute flags based on a call instruction
+/// and called function attributes.
+void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+ unsigned AttrIdx) {
+ isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
+ isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
+ isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
+ isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
+ isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
+ isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+ isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
+ isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
+ isSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
+ isSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
+ Alignment = CS->getParamAlignment(AttrIdx);
+}
+
+/// Generate a libcall taking the given operands as arguments and returning a
+/// result of type RetVT.
+std::pair<SDValue, SDValue>
+TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
+ ArrayRef<SDValue> Ops, bool isSigned,
+ const SDLoc &dl, bool doesNotReturn,
+ bool isReturnValueUsed) const {
+ TargetLowering::ArgListTy Args;
+ Args.reserve(Ops.size());
+
+ TargetLowering::ArgListEntry Entry;
+ for (SDValue Op : Ops) {
+ Entry.Node = Op;
+ Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Args.push_back(Entry);
+ }
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported library call operation!");
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy(DAG.getDataLayout()));
+
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
+ .setSExtResult(signExtend).setZExtResult(!signExtend);
+ return LowerCallTo(CLI);
+}
+
+/// Soften the operands of a comparison. This code is shared among BR_CC,
+/// SELECT_CC, and SETCC handlers.
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl) const {
+ assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
+ && "Unsupported setcc type!");
+
+ // Expand into one or more soft-fp libcall(s).
+ RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+ bool ShouldInvertCC = false;
+ switch (CCCode) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 :
+ (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
+ (VT == MVT::f64) ? RTLIB::UNE_F64 :
+ (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 :
+ (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
+ break;
+ case ISD::SETLT:
+ case ISD::SETOLT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
+ break;
+ case ISD::SETLE:
+ case ISD::SETOLE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 :
+ (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
+ break;
+ case ISD::SETUO:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 :
+ (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
+ break;
+ case ISD::SETO:
+ LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
+ (VT == MVT::f64) ? RTLIB::O_F64 :
+ (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
+ break;
+ case ISD::SETONE:
+ // SETONE = SETOLT | SETOGT
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
+ break;
+ case ISD::SETUEQ:
+ LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
+ (VT == MVT::f64) ? RTLIB::UO_F64 :
+ (VT == MVT::f128) ? RTLIB::UO_F64 : RTLIB::UO_PPCF128;
+ LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+ (VT == MVT::f64) ? RTLIB::OEQ_F64 :
+ (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
+ break;
+ default:
+ // Invert CC for unordered comparisons
+ ShouldInvertCC = true;
+ switch (CCCode) {
+ case ISD::SETULT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+ (VT == MVT::f64) ? RTLIB::OGE_F64 :
+ (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
+ break;
+ case ISD::SETULE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+ (VT == MVT::f64) ? RTLIB::OGT_F64 :
+ (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
+ break;
+ case ISD::SETUGT:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+ (VT == MVT::f64) ? RTLIB::OLE_F64 :
+ (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
+ break;
+ case ISD::SETUGE:
+ LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+ (VT == MVT::f64) ? RTLIB::OLT_F64 :
+ (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
+ break;
+ default: llvm_unreachable("Do not know how to soften this setcc!");
+ }
+ }
+
+ // Use the target specific return value for comparions lib calls.
+ EVT RetVT = getCmpLibcallReturnType();
+ SDValue Ops[2] = {NewLHS, NewRHS};
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
+ dl).first;
+ NewRHS = DAG.getConstant(0, dl, RetVT);
+
+ CCCode = getCmpLibcallCC(LC1);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
+
+ if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+ SDValue Tmp = DAG.getNode(
+ ISD::SETCC, dl,
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(CCCode));
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
+ dl).first;
+ NewLHS = DAG.getNode(
+ ISD::SETCC, dl,
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
+ NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
+ NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ NewRHS = SDValue();
+ }
+}
+
+/// Return the entry encoding for a jump table in the current function. The
+/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+ // In non-pic modes, just use the address of a block.
+ if (!isPositionIndependent())
+ return MachineJumpTableInfo::EK_BlockAddress;
+
+ // In PIC mode, if the target supports a GPRel32 directive, use it.
+ if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
+ return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+
+ // Otherwise, use a label difference.
+ return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ // If our PIC model is GP relative, use the global offset table as the base.
+ unsigned JTEncoding = getJumpTableEncoding();
+
+ if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+ (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
+ return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
+
+ return Table;
+}
+
+/// This returns the relocation base for the given PIC jumptable, the same as
+/// getPICJumpTableRelocBase, but as an MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+ unsigned JTI,MCContext &Ctx) const{
+ // The normal PIC reloc base is the label at the start of the jump table.
+ return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ const TargetMachine &TM = getTargetMachine();
+ const GlobalValue *GV = GA->getGlobal();
+
+ // If the address is not even local to this DSO we will have to load it from
+ // a got and then add the offset.
+ if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return false;
+
+ // If the code is position independent we will have to add a base register.
+ if (isPositionIndependent())
+ return false;
+
+ // Otherwise we can do it.
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// Check to see if the specified operand of the specified instruction is a
+/// constant integer. If so, check to see if there are any bits set in the
+/// constant that are not demanded. If so, shrink the constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+ const APInt &Demanded) {
+ SDLoc dl(Op);
+
+ // FIXME: ISD::SELECT, ISD::SELECT_CC
+ switch (Op.getOpcode()) {
+ default: break;
+ case ISD::XOR:
+ case ISD::AND:
+ case ISD::OR: {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C) return false;
+
+ if (Op.getOpcode() == ISD::XOR &&
+ (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ return false;
+
+ // if we can expand it to have all bits set, do it
+ if (C->getAPIntValue().intersects(~Demanded)) {
+ EVT VT = Op.getValueType();
+ SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+ DAG.getConstant(Demanded &
+ C->getAPIntValue(),
+ dl, VT));
+ return CombineTo(Op, New);
+ }
+
+ break;
+ }
+ }
+
+ return false;
+}
+
+/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
+/// generalized for targets with other types of implicit widening casts.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+ unsigned BitWidth,
+ const APInt &Demanded,
+ const SDLoc &dl) {
+ assert(Op.getNumOperands() == 2 &&
+ "ShrinkDemandedOp only supports binary operators!");
+ assert(Op.getNode()->getNumValues() == 1 &&
+ "ShrinkDemandedOp only supports nodes with one result!");
+
+ // Early return, as this function cannot handle vector types.
+ if (Op.getValueType().isVector())
+ return false;
+
+ // Don't do this if the node has another user, which may require the
+ // full value.
+ if (!Op.getNode()->hasOneUse())
+ return false;
+
+ // Search for the smallest integer type with free casts to and from
+ // Op's type. For expedience, just check power-of-2 integer types.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros();
+ unsigned SmallVTBits = DemandedSize;
+ if (!isPowerOf2_32(SmallVTBits))
+ SmallVTBits = NextPowerOf2(SmallVTBits);
+ for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+ if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+ TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ // We found a type with free casts.
+ SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+ Op.getNode()->getOperand(1)));
+ bool NeedZext = DemandedSize > SmallVTBits;
+ SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
+ dl, Op.getValueType(), X);
+ return CombineTo(Op, Z);
+ }
+ }
+ return false;
+}
+
+/// Look at Op. At this point, we know that only the DemandedMask bits of the
+/// result of Op are ever used downstream. If we can use this information to
+/// simplify Op, create a new simplified DAG node and return true, returning the
+/// original and new nodes in Old and New. Otherwise, analyze the expression and
+/// return a mask of KnownOne and KnownZero bits for the expression (used to
+/// simplify the caller). The KnownZero/One bits may only be accurate for those
+/// bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ TargetLoweringOpt &TLO,
+ unsigned Depth) const {
+ unsigned BitWidth = DemandedMask.getBitWidth();
+ assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+ "Mask size mismatches value type size!");
+ APInt NewMask = DemandedMask;
+ SDLoc dl(Op);
+ auto &DL = TLO.DAG.getDataLayout();
+
+ // Don't know anything.
+ KnownZero = KnownOne = APInt(BitWidth, 0);
+
+ // Other users may use these bits.
+ if (!Op.getNode()->hasOneUse()) {
+ if (Depth != 0) {
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // simplify things downstream.
+ TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
+ return false;
+ }
+ // If this is the root being simplified, allow it to have multiple uses,
+ // just set the NewMask to all bits.
+ NewMask = APInt::getAllOnesValue(BitWidth);
+ } else if (DemandedMask == 0) {
+ // Not demanding any bits from Op.
+ if (!Op.isUndef())
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+ return false;
+ } else if (Depth == 6) { // Limit search depth.
+ return false;
+ }
+
+ APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+ switch (Op.getOpcode()) {
+ case ISD::Constant:
+ // We know all of the bits for a constant!
+ KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+ KnownZero = ~KnownOne;
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::AND:
+ // If the RHS is a constant, check to see if the LHS would be zero without
+ // using the bits from the RHS. Below, we use knowledge about the RHS to
+ // simplify the LHS, here we're using information from the LHS to simplify
+ // the RHS.
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt LHSZero, LHSOne;
+ // Do not increment Depth here; that can cause an infinite loop.
+ TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
+ // If the LHS already has zeros where RHSC does, this and is dead.
+ if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ // If any of the set bits in the RHS are known zero on the LHS, shrink
+ // the constant.
+ if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+ return true;
+ }
+
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known one on one side, return the other.
+ // These bits cannot contribute to the result of the 'and'.
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the demanded bits in the inputs are known zeros, return zero.
+ if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ KnownOne &= KnownOne2;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ KnownZero |= KnownZero2;
+ break;
+ case ISD::OR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+ KnownZero2, KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'or'.
+ if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the RHS is a constant, see if we can simplify it.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ KnownZero &= KnownZero2;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ KnownOne |= KnownOne2;
+ break;
+ case ISD::XOR:
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If all of the demanded bits are known zero on one side, return the other.
+ // These bits cannot contribute to the result of the 'xor'.
+ if ((KnownZero & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+ if ((KnownZero2 & NewMask) == NewMask)
+ return TLO.CombineTo(Op, Op.getOperand(1));
+ // If the operation can be done in a smaller type, do so.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+
+ // If all of the unknown bits are known to be zero on one side or the other
+ // (but not both) turn this into an *inclusive* or.
+ // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+ if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1)));
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+ // If all of the demanded bits on one side are known, and all of the set
+ // bits on that side are also known to be set on the other side, turn this
+ // into an AND, as we know the bits will be cleared.
+ // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+ // NB: it is okay if more bits are known than are requested
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side
+ if (KnownOne == KnownOne2) { // set bits are the same on both sides
+ EVT VT = Op.getValueType();
+ SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ Op.getOperand(0), ANDC));
+ }
+ }
+
+ // If the RHS is a constant, see if we can simplify it.
+ // for XOR, we prefer to force bits to 1 if they will make a -1.
+ // if we can't force bits, try to shrink constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt Expanded = C->getAPIntValue() | (~NewMask);
+ // if we can expand it to have all bits set, do it
+ if (Expanded.isAllOnesValue()) {
+ if (Expanded != C->getAPIntValue()) {
+ EVT VT = Op.getValueType();
+ SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+ TLO.DAG.getConstant(Expanded, dl, VT));
+ return TLO.CombineTo(Op, New);
+ }
+ // if it already has all the bits set, nothing to change
+ // but don't shrink either!
+ } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+ return true;
+ }
+ }
+
+ KnownZero = KnownZeroOut;
+ KnownOne = KnownOneOut;
+ break;
+ case ISD::SELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SELECT_CC:
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+ // If the operands are constants, see if we can simplify them.
+ if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ return true;
+
+ // Only known if known in both the LHS and RHS.
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ case ISD::SHL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ unsigned ShAmt = SA->getZExtValue();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the bottom bits (which are shifted
+ // out) are never demanded.
+ if (InOp.getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+
+ // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+ // are not demanded. This will likely allow the anyext to be folded away.
+ if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = InOp.getNode()->getOperand(0);
+ EVT InnerVT = InnerOp.getValueType();
+ unsigned InnerBits = InnerVT.getSizeInBits();
+ if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
+ isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+ EVT ShTy = getShiftAmountTy(InnerVT, DL);
+ if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
+ ShTy = InnerVT;
+ SDValue NarrowShl =
+ TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getConstant(ShAmt, dl, ShTy));
+ return
+ TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
+ NarrowShl));
+ }
+ // Repeat the SHL optimization above in cases where an extension
+ // intervenes: (shl (anyext (shr x, c1)), c2) to
+ // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
+ // aren't demanded (as above) and that the shifted upper c1 bits of
+ // x aren't demanded.
+ if (InOp.hasOneUse() &&
+ InnerOp.getOpcode() == ISD::SRL &&
+ InnerOp.hasOneUse() &&
+ isa<ConstantSDNode>(InnerOp.getOperand(1))) {
+ uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
+ ->getZExtValue();
+ if (InnerShAmt < ShAmt &&
+ InnerShAmt < InnerBits &&
+ NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 &&
+ NewMask.trunc(ShAmt) == 0) {
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
+ Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+ InnerOp.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
+ NewExt, NewSA));
+ }
+ }
+ }
+
+ KnownZero <<= SA->getZExtValue();
+ KnownOne <<= SA->getZExtValue();
+ // low bits known zero.
+ KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ }
+ break;
+ case ISD::SRL:
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+ unsigned VTSize = VT.getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ APInt InDemandedMask = (NewMask << ShAmt);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
+ InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+
+ // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+ // single shift. We can do this if the top bits (which are shifted out)
+ // are never demanded.
+ if (InOp.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+ unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
+
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0), NewSA));
+ }
+ }
+
+ // Compute the new bits that are at the top now.
+ if (SimplifyDemandedBits(InOp, InDemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ KnownZero |= HighBits; // High bits known zero.
+ }
+ break;
+ case ISD::SRA:
+ // If this is an arithmetic shift right and only the low-bit is set, we can
+ // always convert this into a logical shr, even if the shift amount is
+ // variable. The low bit of the shift cannot be an input sign bit unless
+ // the shift amount is >= the size of the datatype, which is undefined.
+ if (NewMask == 1)
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+ Op.getOperand(0), Op.getOperand(1)));
+
+ if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ EVT VT = Op.getValueType();
+ unsigned ShAmt = SA->getZExtValue();
+
+ // If the shift count is an invalid immediate, don't do anything.
+ if (ShAmt >= BitWidth)
+ break;
+
+ APInt InDemandedMask = (NewMask << ShAmt);
+
+ // If the shift is exact, then it does demand the low bits (and knows that
+ // they are zero).
+ if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
+ InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+ if (HighBits.intersects(NewMask))
+ InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+
+ // Handle the sign bit, adjusted to where it is now in the mask.
+ APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+
+ // If the input sign bit is known to be zero, or if none of the top bits
+ // are demanded, turn this into an unsigned shift right.
+ if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+ SDNodeFlags Flags;
+ Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact());
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
+ Op.getOperand(1), &Flags));
+ }
+
+ int Log2 = NewMask.exactLogBase2();
+ if (Log2 >= 0) {
+ // The bit must come from the sign.
+ SDValue NewSA =
+ TLO.DAG.getConstant(BitWidth - 1 - Log2, dl,
+ Op.getOperand(1).getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ Op.getOperand(0), NewSA));
+ }
+
+ if (KnownOne.intersects(SignBit))
+ // New bits are known one.
+ KnownOne |= HighBits;
+ }
+ break;
+ case ISD::SIGN_EXTEND_INREG: {
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+ // If we only care about the highest bit, don't bother shifting right.
+ if (MsbMask == NewMask) {
+ unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+ SDValue InOp = Op.getOperand(0);
+ unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+ bool AlreadySignExtended =
+ TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+ // However if the input is already sign extended we expect the sign
+ // extension to be dropped altogether later and do not simplify.
+ if (!AlreadySignExtended) {
+ // Compute the correct shift amount type, which must be getShiftAmountTy
+ // for scalar types after legalization.
+ EVT ShiftAmtTy = Op.getValueType();
+ if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+ ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
+
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl,
+ ShiftAmtTy);
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(), InOp,
+ ShiftAmt));
+ }
+ }
+
+ // Sign extension. Compute the demanded bits in the result that are not
+ // present in the input.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth - ExVT.getScalarType().getSizeInBits());
+
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ if ((NewBits & NewMask) == 0)
+ return TLO.CombineTo(Op, Op.getOperand(0));
+
+ APInt InSignBit =
+ APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
+ APInt InputDemandedBits =
+ APInt::getLowBitsSet(BitWidth,
+ ExVT.getScalarType().getSizeInBits()) &
+ NewMask;
+
+ // Since the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InputDemandedBits |= InSignBit;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ // If the sign bit of the input is known set or clear, then we know the
+ // top bits of the result.
+
+ // If the input sign bit is known zero, convert this into a zero extension.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
+
+ if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
+ KnownOne |= NewBits;
+ KnownZero &= ~NewBits;
+ } else { // Input sign bit unknown
+ KnownZero &= ~NewBits;
+ KnownOne &= ~NewBits;
+ }
+ break;
+ }
+ case ISD::BUILD_PAIR: {
+ EVT HalfVT = Op.getOperand(0).getValueType();
+ unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
+
+ APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
+ APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
+
+ APInt KnownZeroLo, KnownOneLo;
+ APInt KnownZeroHi, KnownOneHi;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo,
+ KnownOneLo, TLO, Depth + 1))
+ return true;
+
+ if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi,
+ KnownOneHi, TLO, Depth + 1))
+ return true;
+
+ KnownZero = KnownZeroLo.zext(BitWidth) |
+ KnownZeroHi.zext(BitWidth).shl(HalfBitWidth);
+
+ KnownOne = KnownOneLo.zext(BitWidth) |
+ KnownOneHi.zext(BitWidth).shl(HalfBitWidth);
+ break;
+ }
+ case ISD::ZERO_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ APInt NewBits =
+ APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+ if (!NewBits.intersects(NewMask))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero |= NewBits;
+ break;
+ }
+ case ISD::SIGN_EXTEND: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
+ APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
+ APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt NewBits = ~InMask & NewMask;
+
+ // If none of the top bits are demanded, convert this into an any_extend.
+ if (NewBits == 0)
+ return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ APInt InDemandedBits = InMask & NewMask;
+ InDemandedBits |= InSignBit;
+ InDemandedBits = InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
+ // If the sign bit is known zero, convert this to a zero extend.
+ if (KnownZero.intersects(InSignBit))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+ Op.getValueType(),
+ Op.getOperand(0)));
+
+ // If the sign bit is known one, the top bits match.
+ if (KnownOne.intersects(InSignBit)) {
+ KnownOne |= NewBits;
+ assert((KnownZero & NewBits) == 0);
+ } else { // Otherwise, top bits aren't known.
+ assert((KnownOne & NewBits) == 0);
+ assert((KnownZero & NewBits) == 0);
+ }
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ break;
+ }
+ case ISD::TRUNCATE: {
+ // Simplify the input, using demanded bit information, and compute the known
+ // zero/one bits live out.
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+ APInt TruncMask = NewMask.zext(OperandBitWidth);
+ if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+
+ // If the input is only used by this truncate, see if we can shrink it based
+ // on the known demanded bits.
+ if (Op.getOperand(0).getNode()->hasOneUse()) {
+ SDValue In = Op.getOperand(0);
+ switch (In.getOpcode()) {
+ default: break;
+ case ISD::SRL:
+ // Shrink SRL by a constant if none of the high bits shifted in are
+ // demanded.
+ if (TLO.LegalTypes() &&
+ !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+ // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+ // undesirable.
+ break;
+ ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+ if (!ShAmt)
+ break;
+ SDValue Shift = In.getOperand(1);
+ if (TLO.LegalTypes()) {
+ uint64_t ShVal = ShAmt->getZExtValue();
+ Shift = TLO.DAG.getConstant(ShVal, dl,
+ getShiftAmountTy(Op.getValueType(), DL));
+ }
+
+ APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+ OperandBitWidth - BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
+
+ if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+ Op.getValueType(),
+ In.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+ Op.getValueType(),
+ NewTrunc,
+ Shift));
+ }
+ break;
+ }
+ }
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ break;
+ }
+ case ISD::AssertZext: {
+ // AssertZext demands all of the high bits, plus any of the low bits
+ // demanded by its users.
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
+ KnownZero, KnownOne, TLO, Depth+1))
+ return true;
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ KnownZero |= ~InMask & NewMask;
+ break;
+ }
+ case ISD::BITCAST:
+ // If this is an FP->Int bitcast and if the sign bit is the only
+ // thing demanded, turn this into a FGETSIGN.
+ if (!TLO.LegalOperations() &&
+ !Op.getValueType().isVector() &&
+ !Op.getOperand(0).getValueType().isVector() &&
+ NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+ Op.getOperand(0).getValueType().isFloatingPoint()) {
+ bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
+ bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
+ if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() &&
+ Op.getOperand(0).getValueType() != MVT::f128) {
+ // Cannot eliminate/lower SHL for f128 yet.
+ EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
+ // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+ // place. We expect the SHL to be eliminated by other optimizations.
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
+ unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+ if (!OpVTLegal && OpVTSizeInBits > 32)
+ Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
+ unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+ SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+ Op.getValueType(),
+ Sign, ShAmt));
+ }
+ }
+ break;
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::SUB: {
+ // Add, Sub, and Mul don't demand any bits in positions beyond that
+ // of the highest bit demanded of them.
+ APInt LoMask = APInt::getLowBitsSet(BitWidth,
+ BitWidth - NewMask.countLeadingZeros());
+ if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+ KnownOne2, TLO, Depth+1))
+ return true;
+ // See if the operation should be performed at a smaller bit width.
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ return true;
+ }
+ // FALL THROUGH
+ default:
+ // Just use computeKnownBits to compute output bits.
+ TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
+ break;
+ }
+
+ // If we know the value of all of the demanded bits, return this as a
+ // constant.
+ if ((NewMask & (KnownZero|KnownOne)) == NewMask) {
+ // Avoid folding to a constant if any OpaqueConstant is involved.
+ const SDNode *N = Op.getNode();
+ for (SDNodeIterator I = SDNodeIterator::begin(N),
+ E = SDNodeIterator::end(N); I != E; ++I) {
+ SDNode *Op = *I;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->isOpaque())
+ return false;
+ }
+ return TLO.CombineTo(Op,
+ TLO.DAG.getConstant(KnownOne, dl, Op.getValueType()));
+ }
+
+ return false;
+}
+
+/// Determine which of the bits specified in Mask are known to be either zero or
+/// one and return them in the KnownZero/KnownOne bitsets.
+void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use MaskedValueIsZero if you don't know whether Op"
+ " is a target node!");
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
+}
+
+/// This method can be implemented by targets that want to expose additional
+/// information about sign bits to the DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ const SelectionDAG &,
+ unsigned Depth) const {
+ assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use ComputeNumSignBits if you don't know whether Op"
+ " is a target node!");
+ return 1;
+}
+
+bool TargetLowering::isConstTrueVal(const SDNode *N) const {
+ if (!N)
+ return false;
+
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) {
+ const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ BitVector UndefElements;
+ CN = BV->getConstantSplatNode(&UndefElements);
+ // Only interested in constant splats, and we don't try to handle undef
+ // elements in identifying boolean constants.
+ if (!CN || UndefElements.none())
+ return false;
+ }
+
+ switch (getBooleanContents(N->getValueType(0))) {
+ case UndefinedBooleanContent:
+ return CN->getAPIntValue()[0];
+ case ZeroOrOneBooleanContent:
+ return CN->isOne();
+ case ZeroOrNegativeOneBooleanContent:
+ return CN->isAllOnesValue();
+ }
+
+ llvm_unreachable("Invalid boolean contents");
+}
+
+bool TargetLowering::isConstFalseVal(const SDNode *N) const {
+ if (!N)
+ return false;
+
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+ if (!CN) {
+ const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+ if (!BV)
+ return false;
+
+ BitVector UndefElements;
+ CN = BV->getConstantSplatNode(&UndefElements);
+ // Only interested in constant splats, and we don't try to handle undef
+ // elements in identifying boolean constants.
+ if (!CN || UndefElements.none())
+ return false;
+ }
+
+ if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
+ return !CN->getAPIntValue()[0];
+
+ return CN->isNullValue();
+}
+
+bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
+ bool SExt) const {
+ if (VT == MVT::i1)
+ return N->isOne();
+
+ TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
+ switch (Cnt) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ // An extended value of 1 is always true, unless its original type is i1,
+ // in which case it will be sign extended to -1.
+ return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
+ case TargetLowering::UndefinedBooleanContent:
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return N->isAllOnesValue() && SExt;
+ }
+ llvm_unreachable("Unexpected enumeration.");
+}
+
+/// This helper function of SimplifySetCC tries to optimize the comparison when
+/// either operand of the SetCC node is a bitwise-and instruction.
+SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ // Match these patterns in any of their permutations:
+ // (X & Y) == Y
+ // (X & Y) != Y
+ if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
+ std::swap(N0, N1);
+
+ EVT OpVT = N0.getValueType();
+ if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
+ (Cond != ISD::SETEQ && Cond != ISD::SETNE))
+ return SDValue();
+
+ SDValue X, Y;
+ if (N0.getOperand(0) == N1) {
+ X = N0.getOperand(1);
+ Y = N0.getOperand(0);
+ } else if (N0.getOperand(1) == N1) {
+ X = N0.getOperand(0);
+ Y = N0.getOperand(1);
+ } else {
+ return SDValue();
+ }
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ if (DAG.isKnownToBeAPowerOfTwo(Y)) {
+ // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
+ // Note that where Y is variable and is known to have at most one bit set
+ // (for example, if it is Z & 1) we cannot do this; the expressions are not
+ // equivalent when Y == 0.
+ Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(Cond, N0.getSimpleValueType()))
+ return DAG.getSetCC(DL, VT, N0, Zero, Cond);
+ } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
+ // If the target supports an 'and-not' or 'and-complement' logic operation,
+ // try to use that to make a comparison operation more efficient.
+ // But don't do this transform if the mask is a single bit because there are
+ // more efficient ways to deal with that case (for example, 'bt' on x86 or
+ // 'rlwinm' on PPC).
+
+ // Bail out if the compare operand that we want to turn into a zero is
+ // already a zero (otherwise, infinite loop).
+ auto *YConst = dyn_cast<ConstantSDNode>(Y);
+ if (YConst && YConst->isNullValue())
+ return SDValue();
+
+ // Transform this into: ~X & Y == 0.
+ SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
+ return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
+ }
+
+ return SDValue();
+}
+
+/// Try to simplify a setcc built with the specified operands and cc. If it is
+/// unable to simplify it, return a null SDValue.
+SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, bool foldBooleans,
+ DAGCombinerInfo &DCI,
+ const SDLoc &dl) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ // These setcc operations always fold.
+ switch (Cond) {
+ default: break;
+ case ISD::SETFALSE:
+ case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT);
+ case ISD::SETTRUE:
+ case ISD::SETTRUE2: {
+ TargetLowering::BooleanContent Cnt =
+ getBooleanContents(N0->getValueType(0));
+ return DAG.getConstant(
+ Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl,
+ VT);
+ }
+ }
+
+ // Ensure that the constant occurs on the RHS, and fold constant
+ // comparisons.
+ ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
+ if (isa<ConstantSDNode>(N0.getNode()) &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
+
+ // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+ // equality comparison, then we're just comparing whether X itself is
+ // zero.
+ if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+ N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ const APInt &ShAmt
+ = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+ Zero, Cond);
+ }
+ }
+
+ SDValue CTPOP = N0;
+ // Look through truncs that don't change the value of a ctpop.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+ (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+ Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+ DAG.getConstant(1, dl, CTVT));
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
+ }
+
+ // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ }
+
+ // (zext x) == C --> x == (trunc C)
+ // (sext x) == C --> x == (trunc C)
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ DCI.isBeforeLegalize() && N0->hasOneUse()) {
+ unsigned MinBits = N0.getValueSizeInBits();
+ SDValue PreExt;
+ bool Signed = false;
+ if (N0->getOpcode() == ISD::ZERO_EXTEND) {
+ // ZExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreExt = N0->getOperand(0);
+ } else if (N0->getOpcode() == ISD::AND) {
+ // DAGCombine turns costly ZExts into ANDs
+ if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+ if ((C->getAPIntValue()+1).isPowerOf2()) {
+ MinBits = C->getAPIntValue().countTrailingOnes();
+ PreExt = N0->getOperand(0);
+ }
+ } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
+ // SExt
+ MinBits = N0->getOperand(0).getValueSizeInBits();
+ PreExt = N0->getOperand(0);
+ Signed = true;
+ } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
+ // ZEXTLOAD / SEXTLOAD
+ if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreExt = N0;
+ } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
+ Signed = true;
+ MinBits = LN0->getMemoryVT().getSizeInBits();
+ PreExt = N0;
+ }
+ }
+
+ // Figure out how many bits we need to preserve this constant.
+ unsigned ReqdBits = Signed ?
+ C1.getBitWidth() - C1.getNumSignBits() + 1 :
+ C1.getActiveBits();
+
+ // Make sure we're not losing bits from the constant.
+ if (MinBits > 0 &&
+ MinBits < C1.getBitWidth() &&
+ MinBits >= ReqdBits) {
+ EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
+ if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
+ // Will get folded away.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
+ SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
+ return DAG.getSetCC(dl, VT, Trunc, C, Cond);
+ }
+
+ // If truncating the setcc operands is not desirable, we can still
+ // simplify the expression in some cases:
+ // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
+ // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
+ // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
+ // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
+ // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
+ // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
+ SDValue TopSetCC = N0->getOperand(0);
+ unsigned N0Opc = N0->getOpcode();
+ bool SExt = (N0Opc == ISD::SIGN_EXTEND);
+ if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
+ TopSetCC.getOpcode() == ISD::SETCC &&
+ (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
+ (isConstFalseVal(N1C) ||
+ isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
+
+ bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
+ (!N1C->isNullValue() && Cond == ISD::SETNE);
+
+ if (!Inverse)
+ return TopSetCC;
+
+ ISD::CondCode InvCond = ISD::getSetCCInverse(
+ cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
+ TopSetCC.getOperand(0).getValueType().isInteger());
+ return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
+ TopSetCC.getOperand(1),
+ InvCond);
+
+ }
+ }
+ }
+
+ // If the LHS is '(and load, const)', the RHS is 0,
+ // the test is for equality or unsigned, and all 1 bits of the const are
+ // in the same partial word, see if we can shorten the load.
+ if (DCI.isBeforeLegalize() &&
+ !ISD::isSignedIntSetCC(Cond) &&
+ N0.getOpcode() == ISD::AND && C1 == 0 &&
+ N0.getNode()->hasOneUse() &&
+ isa<LoadSDNode>(N0.getOperand(0)) &&
+ N0.getOperand(0).getNode()->hasOneUse() &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+ APInt bestMask;
+ unsigned bestWidth = 0, bestOffset = 0;
+ if (!Lod->isVolatile() && Lod->isUnindexed()) {
+ unsigned origWidth = N0.getValueType().getSizeInBits();
+ unsigned maskWidth = origWidth;
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // 8 bits, but have to be careful...
+ if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+ origWidth = Lod->getMemoryVT().getSizeInBits();
+ const APInt &Mask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+ APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+ for (unsigned offset=0; offset<origWidth/width; offset++) {
+ if ((newMask & Mask) == Mask) {
+ if (!DAG.getDataLayout().isLittleEndian())
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
+ else
+ bestOffset = (uint64_t)offset * (width/8);
+ bestMask = Mask.lshr(offset * (width/8) * 8);
+ bestWidth = width;
+ break;
+ }
+ newMask = newMask << width;
+ }
+ }
+ }
+ if (bestWidth) {
+ EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
+ if (newVT.isRound()) {
+ EVT PtrType = Lod->getOperand(1).getValueType();
+ SDValue Ptr = Lod->getBasePtr();
+ if (bestOffset != 0)
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+ DAG.getConstant(bestOffset, dl, PtrType));
+ unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+ SDValue NewLoad = DAG.getLoad(
+ newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
+ return DAG.getSetCC(dl, VT,
+ DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+ DAG.getConstant(bestMask.trunc(bestWidth),
+ dl, newVT)),
+ DAG.getConstant(0LL, dl, newVT), Cond);
+ }
+ }
+ }
+
+ // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+ unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+ // If the comparison constant has bits in the upper part, the
+ // zero-extended value could never match.
+ if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+ C1.getBitWidth() - InSize))) {
+ switch (Cond) {
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETEQ: return DAG.getConstant(0, dl, VT);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETNE: return DAG.getConstant(1, dl, VT);
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // True if the sign bit of C1 is set.
+ return DAG.getConstant(C1.isNegative(), dl, VT);
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // True if the sign bit of C1 isn't set.
+ return DAG.getConstant(C1.isNonNegative(), dl, VT);
+ default:
+ break;
+ }
+ }
+
+ // Otherwise, we can perform the comparison with the low bits.
+ switch (Cond) {
+ case ISD::SETEQ:
+ case ISD::SETNE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE: {
+ EVT newVT = N0.getOperand(0).getValueType();
+ if (DCI.isBeforeLegalizeOps() ||
+ (isOperationLegal(ISD::SETCC, newVT) &&
+ getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) {
+ EVT NewSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
+ SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
+
+ SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
+ NewConst, Cond);
+ return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
+ }
+ break;
+ }
+ default:
+ break; // todo, be more careful with signed comparisons
+ }
+ } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+ unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+ EVT ExtDstTy = N0.getValueType();
+ unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+ // If the constant doesn't fit into the number of bits for the source of
+ // the sign extension, it is impossible for both sides to be equal.
+ if (C1.getMinSignedBits() > ExtSrcTyBits)
+ return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
+
+ SDValue ZextOp;
+ EVT Op0Ty = N0.getOperand(0).getValueType();
+ if (Op0Ty == ExtSrcTy) {
+ ZextOp = N0.getOperand(0);
+ } else {
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+ DAG.getConstant(Imm, dl, Op0Ty));
+ }
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(ZextOp.getNode());
+ // Otherwise, make this a use of a zext.
+ return DAG.getSetCC(dl, VT, ZextOp,
+ DAG.getConstant(C1 & APInt::getLowBitsSet(
+ ExtDstTyBits,
+ ExtSrcTyBits),
+ dl, ExtDstTy),
+ Cond);
+ } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
+ if (N0.getOpcode() == ISD::SETCC &&
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+ bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+ if (TrueWhenTrue)
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ // Invert the condition.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ CC = ISD::getSetCCInverse(CC,
+ N0.getOperand(0).getValueType().isInteger());
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+ }
+
+ if ((N0.getOpcode() == ISD::XOR ||
+ (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR &&
+ N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
+ // can only do this if the top bits are known zero.
+ unsigned BitWidth = N0.getValueSizeInBits();
+ if (DAG.MaskedValueIsZero(N0,
+ APInt::getHighBitsSet(BitWidth,
+ BitWidth-1))) {
+ // Okay, get the un-inverted input value.
+ SDValue Val;
+ if (N0.getOpcode() == ISD::XOR)
+ Val = N0.getOperand(0);
+ else {
+ assert(N0.getOpcode() == ISD::AND &&
+ N0.getOperand(0).getOpcode() == ISD::XOR);
+ // ((X^1)&1)^1 -> X & 1
+ Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+ N0.getOperand(0).getOperand(0),
+ N0.getOperand(1));
+ }
+
+ return DAG.getSetCC(dl, VT, Val, N1,
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ } else if (N1C->getAPIntValue() == 1 &&
+ (VT == MVT::i1 ||
+ getBooleanContents(N0->getValueType(0)) ==
+ ZeroOrOneBooleanContent)) {
+ SDValue Op0 = N0;
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+
+ if ((Op0.getOpcode() == ISD::XOR) &&
+ Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+ Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+ Cond);
+ }
+ if (Op0.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+ if (Op0.getValueType().bitsGT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, dl, VT));
+ else if (Op0.getValueType().bitsLT(VT))
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, dl, VT));
+
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, dl, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ if (Op0.getOpcode() == ISD::AssertZext &&
+ cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, dl, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
+ }
+
+ APInt MinVal, MaxVal;
+ unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+ if (ISD::isSignedIntSetCC(Cond)) {
+ MinVal = APInt::getSignedMinValue(OperandBitSize);
+ MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+ } else {
+ MinVal = APInt::getMinValue(OperandBitSize);
+ MaxVal = APInt::getMaxValue(OperandBitSize);
+ }
+
+ // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+ if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true
+ // X >= C0 --> X > (C0 - 1)
+ APInt C = C1 - 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
+ }
+
+ if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+ if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true
+ // X <= C0 --> X < (C0 + 1)
+ APInt C = C1 + 1;
+ ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
+ if ((DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+ (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
+ isLegalICmpImmediate(C.getSExtValue())))) {
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(C, dl, N1.getValueType()),
+ NewCC);
+ }
+ }
+
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+ return DAG.getConstant(0, dl, VT); // X < MIN --> false
+ if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+ return DAG.getConstant(1, dl, VT); // X >= MIN --> true
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+ return DAG.getConstant(0, dl, VT); // X > MAX --> false
+ if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+ return DAG.getConstant(1, dl, VT); // X <= MAX --> true
+
+ // Canonicalize setgt X, Min --> setne X, Min
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+ // Canonicalize setlt X, Max --> setne X, Max
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+ // If we have setult X, 1, turn it into seteq X, 0
+ if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+ // If we have setugt X, Max-1, turn it into seteq X, Max
+ if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MaxVal, dl, N0.getValueType()),
+ ISD::SETEQ);
+
+ // If we have "setcc X, C0", check to see if we can shrink the immediate
+ // by changing cc.
+
+ // SETUGT X, SINTMAX -> SETLT X, 0
+ if (Cond == ISD::SETUGT &&
+ C1 == APInt::getSignedMaxValue(OperandBitSize))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(0, dl, N1.getValueType()),
+ ISD::SETLT);
+
+ // SETULT X, SINTMIN -> SETGT X, -1
+ if (Cond == ISD::SETULT &&
+ C1 == APInt::getSignedMinValue(OperandBitSize)) {
+ SDValue ConstMinusOne =
+ DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
+ N1.getValueType());
+ return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+ }
+
+ // Fold bit comparisons when we can.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ (VT == N0.getValueType() ||
+ (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+ N0.getOpcode() == ISD::AND) {
+ auto &DL = DAG.getDataLayout();
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ EVT ShiftTy = DCI.isBeforeLegalize()
+ ? getPointerTy(DL)
+ : getShiftAmountTy(N0.getValueType(), DL);
+ if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
+ // Perform the xform if the AND RHS is a single bit.
+ if (AndRHS->getAPIntValue().isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
+ ShiftTy)));
+ }
+ } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+ // (X & 8) == 8 --> (X & 8) >> 3
+ // Perform the xform if C1 is a single bit.
+ if (C1.isPowerOf2()) {
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+ DAG.getConstant(C1.logBase2(), dl,
+ ShiftTy)));
+ }
+ }
+ }
+ }
+
+ if (C1.getMinSignedBits() <= 64 &&
+ !isLegalICmpImmediate(C1.getSExtValue())) {
+ // (X & -256) == 256 -> (X >> 8) == 1
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ const APInt &AndRHSC = AndRHS->getAPIntValue();
+ if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ unsigned ShiftBits = AndRHSC.countTrailingZeros();
+ auto &DL = DAG.getDataLayout();
+ EVT ShiftTy = DCI.isBeforeLegalize()
+ ? getPointerTy(DL)
+ : getShiftAmountTy(N0.getValueType(), DL);
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl,
+ ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
+ }
+ } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
+ Cond == ISD::SETULE || Cond == ISD::SETUGT) {
+ bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
+ // X < 0x100000000 -> (X >> 32) < 1
+ // X >= 0x100000000 -> (X >> 32) >= 1
+ // X <= 0x0ffffffff -> (X >> 32) < 1
+ // X > 0x0ffffffff -> (X >> 32) >= 1
+ unsigned ShiftBits;
+ APInt NewC = C1;
+ ISD::CondCode NewCond = Cond;
+ if (AdjOne) {
+ ShiftBits = C1.countTrailingOnes();
+ NewC = NewC + 1;
+ NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ } else {
+ ShiftBits = C1.countTrailingZeros();
+ }
+ NewC = NewC.lshr(ShiftBits);
+ if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
+ isLegalICmpImmediate(NewC.getSExtValue())) {
+ auto &DL = DAG.getDataLayout();
+ EVT ShiftTy = DCI.isBeforeLegalize()
+ ? getPointerTy(DL)
+ : getShiftAmountTy(N0.getValueType(), DL);
+ EVT CmpTy = N0.getValueType();
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ DAG.getConstant(ShiftBits, dl, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
+ }
+ }
+ }
+ }
+
+ if (isa<ConstantFPSDNode>(N0.getNode())) {
+ // Constant fold or commute setcc.
+ SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+ if (O.getNode()) return O;
+ } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+ // If the RHS of an FP comparison is a constant, simplify it away in
+ // some cases.
+ if (CFP->getValueAPF().isNaN()) {
+ // If an operand is known to be a nan, we can fold it.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default: llvm_unreachable("Unknown flavor!");
+ case 0: // Known false.
+ return DAG.getConstant(0, dl, VT);
+ case 1: // Known true.
+ return DAG.getConstant(1, dl, VT);
+ case 2: // Undefined.
+ return DAG.getUNDEF(VT);
+ }
+ }
+
+ // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
+ // constant if knowing that the operand is non-nan is enough. We prefer to
+ // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+ // materialize 0.0.
+ if (Cond == ISD::SETO || Cond == ISD::SETUO)
+ return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+ // If the condition is not legal, see if we can find an equivalent one
+ // which is legal.
+ if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
+ // If the comparison was an awkward floating-point == or != and one of
+ // the comparison operands is infinity or negative infinity, convert the
+ // condition to a less-awkward <= or >=.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+ } else {
+ if (Cond == ISD::SETOEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+ if (Cond == ISD::SETUEQ &&
+ isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+ if (Cond == ISD::SETUNE &&
+ isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+ if (Cond == ISD::SETONE &&
+ isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
+ return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+ }
+ }
+ }
+ }
+
+ if (N0 == N1) {
+ // The sext(setcc()) => setcc() optimization relies on the appropriate
+ // constant being emitted.
+ uint64_t EqVal = 0;
+ switch (getBooleanContents(N0.getValueType())) {
+ case UndefinedBooleanContent:
+ case ZeroOrOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond);
+ break;
+ case ZeroOrNegativeOneBooleanContent:
+ EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0;
+ break;
+ }
+
+ // We can always fold X == X for integer setcc's.
+ if (N0.getValueType().isInteger()) {
+ return DAG.getConstant(EqVal, dl, VT);
+ }
+ unsigned UOF = ISD::getUnorderedFlavor(Cond);
+ if (UOF == 2) // FP operators that are undefined on NaNs.
+ return DAG.getConstant(EqVal, dl, VT);
+ if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+ return DAG.getConstant(EqVal, dl, VT);
+ // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
+ // if it is not already.
+ ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+ if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
+ getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal))
+ return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+ }
+
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getValueType().isInteger()) {
+ if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+ N0.getOpcode() == ISD::XOR) {
+ // Simplify (X+Y) == (X+Z) --> Y == Z
+ if (N0.getOpcode() == N1.getOpcode()) {
+ if (N0.getOperand(0) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+ if (N0.getOperand(1) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+ if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+ // If X op Y == Y op X, try other combinations.
+ if (N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ Cond);
+ if (N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ Cond);
+ }
+ }
+
+ // If RHS is a legal immediate value for a compare instruction, we need
+ // to be careful about increasing register pressure needlessly.
+ bool LegalRHSImm = false;
+
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+ if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ // Turn (X+C1) == C2 --> X == C2-C1
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue()-
+ LHSR->getAPIntValue(),
+ dl, N0.getValueType()), Cond);
+ }
+
+ // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+ if (N0.getOpcode() == ISD::XOR)
+ // If we know that all of the inverted bits are zero, don't bother
+ // performing the inversion.
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^
+ RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
+ }
+
+ // Turn (C1-X) == C2 --> X == C1-C2
+ if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+ return
+ DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() -
+ RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
+ }
+ }
+
+ // Could RHSC fold directly into a compare?
+ if (RHSC->getValueType(0).getSizeInBits() <= 64)
+ LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
+ }
+
+ // Simplify (X+Z) == X --> Z == 0
+ // Don't do this if X is an immediate that can fold into a cmp
+ // instruction and X+Z has other uses. It could be an induction variable
+ // chain, and the transform would increase register pressure.
+ if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, dl, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, dl, N0.getValueType()),
+ Cond);
+ if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ auto &DL = DAG.getDataLayout();
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(
+ ISD::SHL, dl, N1.getValueType(), N1,
+ DAG.getConstant(1, dl,
+ getShiftAmountTy(N1.getValueType(), DL)));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
+ }
+ }
+ }
+
+ if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+ N1.getOpcode() == ISD::XOR) {
+ // Simplify X == (X+Z) --> Z == 0
+ if (N1.getOperand(0) == N0)
+ return DAG.getSetCC(dl, VT, N1.getOperand(1),
+ DAG.getConstant(0, dl, N1.getValueType()), Cond);
+ if (N1.getOperand(1) == N0) {
+ if (DAG.isCommutativeBinOp(N1.getOpcode()))
+ return DAG.getSetCC(dl, VT, N1.getOperand(0),
+ DAG.getConstant(0, dl, N1.getValueType()), Cond);
+ if (N1.getNode()->hasOneUse()) {
+ assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+ auto &DL = DAG.getDataLayout();
+ // X == (Z-X) --> X<<1 == Z
+ SDValue SH = DAG.getNode(
+ ISD::SHL, dl, N1.getValueType(), N0,
+ DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL)));
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+ }
+ }
+ }
+
+ if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl))
+ return V;
+ }
+
+ // Fold away ALL boolean setcc's.
+ SDValue Temp;
+ if (N0.getValueType() == MVT::i1 && foldBooleans) {
+ switch (Cond) {
+ default: llvm_unreachable("Unknown integer setcc!");
+ case ISD::SETEQ: // X == Y -> ~(X^Y)
+ Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ N0 = DAG.getNOT(dl, Temp, MVT::i1);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETNE: // X != Y --> (X^Y)
+ N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+ break;
+ case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ Temp = DAG.getNOT(dl, N0, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(Temp.getNode());
+ break;
+ case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ Temp = DAG.getNOT(dl, N1, MVT::i1);
+ N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+ break;
+ }
+ if (VT != MVT::i1) {
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(N0.getNode());
+ // FIXME: If running after legalize, we probably can't do this.
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+ }
+ return N0;
+ }
+
+ // Could not fold it.
+ return SDValue();
+}
+
+/// Returns true (and the GlobalValue and the offset) if the node is a
+/// GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
+ int64_t &Offset) const {
+ if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
+ GA = GASD->getGlobal();
+ Offset += GASD->getOffset();
+ return true;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ SDValue N1 = N->getOperand(0);
+ SDValue N2 = N->getOperand(1);
+ if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+ if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
+ Offset += V->getSExtValue();
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+SDValue TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ // Default implementation: no optimization.
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(StringRef Constraint) const {
+ unsigned S = Constraint.size();
+
+ if (S == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'r': return C_RegisterClass;
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
+ return C_Memory;
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
+ case 's': // Relocatable Constant
+ case 'p': // Address.
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case '<':
+ case '>':
+ return C_Other;
+ }
+ }
+
+ if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
+ if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
+ return C_Memory;
+ return C_Register;
+ }
+ return C_Unknown;
+}
+
+/// Try to replace an X constraint, which matches anything, with another that
+/// has more specific requirements based on the type of the corresponding
+/// operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+ if (ConstraintVT.isInteger())
+ return "r";
+ if (ConstraintVT.isFloatingPoint())
+ return "f"; // works for many targets
+ return nullptr;
+}
+
+/// Lower the specified operand into the Ops vector.
+/// If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+
+ if (Constraint.length() > 1) return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default: break;
+ case 'X': // Allows any operand; labels (basic block) use this.
+ if (Op.getOpcode() == ISD::BasicBlock) {
+ Ops.push_back(Op);
+ return;
+ }
+ // fall through
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 's': { // Relocatable Constant
+ // These operands are interested in values of the form (GV+C), where C may
+ // be folded in as an offset of GV, or it may be explicitly added. Also, it
+ // is possible and fine if either GV or C are missing.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+ // If we have "(add GV, C)", pull out GV/C
+ if (Op.getOpcode() == ISD::ADD) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+ if (!C || !GA) {
+ C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+ }
+ if (!C || !GA) {
+ C = nullptr;
+ GA = nullptr;
+ }
+ }
+
+ // If we find a valid operand, map to the TargetXXX version so that the
+ // value itself doesn't get selected.
+ if (GA) { // Either &GV or &GV+C
+ if (ConstraintLetter != 'n') {
+ int64_t Offs = GA->getOffset();
+ if (C) Offs += C->getZExtValue();
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ C ? SDLoc(C) : SDLoc(),
+ Op.getValueType(), Offs));
+ }
+ return;
+ }
+ if (C) { // just C, no GV.
+ // Simple constants are not allowed for 's'.
+ if (ConstraintLetter != 's') {
+ // gcc prints these as sign extended. Sign extend value to 64 bits
+ // now; without this it would get ZExt'd later in
+ // ScheduleDAGSDNodes::EmitNode, which is very generic.
+ Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+ SDLoc(C), MVT::i64));
+ }
+ return;
+ }
+ break;
+ }
+ }
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
+ StringRef Constraint,
+ MVT VT) const {
+ if (Constraint.empty() || Constraint[0] != '{')
+ return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
+ assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+ // Remove the braces from around the name.
+ StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+
+ std::pair<unsigned, const TargetRegisterClass*> R =
+ std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
+
+ // Figure out which register class contains this reg.
+ for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+ E = RI->regclass_end(); RCI != E; ++RCI) {
+ const TargetRegisterClass *RC = *RCI;
+
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ if (!isLegalRC(RC))
+ continue;
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I) {
+ if (RegName.equals_lower(RI->getRegAsmName(*I))) {
+ std::pair<unsigned, const TargetRegisterClass*> S =
+ std::make_pair(*I, RC);
+
+ // If this register class has the requested value type, return it,
+ // otherwise keep searching and return the first class found
+ // if no other is found which explicitly has the requested type.
+ if (RC->hasType(VT))
+ return S;
+ else if (!R.second)
+ R = S;
+ }
+ }
+ }
+
+ return R;
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// Return true of this is an input operand that is a matching constraint like
+/// "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
+}
+
+/// If this is an input matching constraint, this method returns the output
+/// operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+ assert(!ConstraintCode.empty() && "No known constraint!");
+ return atoi(ConstraintCode.c_str());
+}
+
+/// Split up the constraint string from the inline assembly value into the
+/// specific constraints and their prefixes, and also tie in the associated
+/// operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector
+TargetLowering::ParseConstraints(const DataLayout &DL,
+ const TargetRegisterInfo *TRI,
+ ImmutableCallSite CS) const {
+ /// Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+
+ for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+ ConstraintOperands.emplace_back(std::move(CI));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT =
+ getSimpleValueType(DL, STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+
+ // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+ if (StructType *STy = dyn_cast<StructType>(OpTy))
+ if (STy->getNumElements() == 1)
+ OpTy = STy->getElementType(0);
+
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = DL.getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpInfo.ConstraintVT =
+ MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ break;
+ }
+ } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
+ unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
+ OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
+ } else {
+ OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
+ }
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (!ConstraintOperands.empty()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ if (cInfo.Type == InlineAsm::isClobber)
+ continue;
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+ getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+ OpInfo.ConstraintVT);
+ std::pair<unsigned, const TargetRegisterClass *> InputRC =
+ getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
+ Input.ConstraintVT);
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (MatchRC.second != InputRC.second)) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+/// Return an integer indicating how general CT is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+ switch (CT) {
+ case TargetLowering::C_Other:
+ case TargetLowering::C_Unknown:
+ return 0;
+ case TargetLowering::C_Register:
+ return 1;
+ case TargetLowering::C_RegisterClass:
+ return 2;
+ case TargetLowering::C_Memory:
+ return 3;
+ }
+ llvm_unreachable("Invalid constraint type");
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (!CallOperandVal)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
+/// If there are multiple different constraints that we could pick for this
+/// operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+/// Other -> immediates and magic values
+/// Register -> one specific register
+/// RegisterClass -> a group of regs
+/// Memory -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it. The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory. Because of
+/// this the heuristic we use is:
+///
+/// 1) If there is an 'other' constraint, and if the operand is valid for
+/// that constraint, use it. This makes us take advantage of 'i'
+/// constraints when available.
+/// 2) Otherwise, pick the most general constraint present. This prefers
+/// 'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+ const TargetLowering &TLI,
+ SDValue Op, SelectionDAG *DAG) {
+ assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+ unsigned BestIdx = 0;
+ TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+ int BestGenerality = -1;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+ TargetLowering::ConstraintType CType =
+ TLI.getConstraintType(OpInfo.Codes[i]);
+
+ // If this is an 'other' constraint, see if the operand is valid for it.
+ // For example, on X86 we might have an 'rI' constraint. If the operand
+ // is an integer in the range [0..31] we want to use I (saving a load
+ // of a register), otherwise we must use 'r'.
+ if (CType == TargetLowering::C_Other && Op.getNode()) {
+ assert(OpInfo.Codes[i].size() == 1 &&
+ "Unhandled multi-letter 'other' constraint");
+ std::vector<SDValue> ResultOps;
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
+ ResultOps, *DAG);
+ if (!ResultOps.empty()) {
+ BestType = CType;
+ BestIdx = i;
+ break;
+ }
+ }
+
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
+ // This constraint letter is more general than the previous one, use it.
+ int Generality = getConstraintGenerality(CType);
+ if (Generality > BestGenerality) {
+ BestType = CType;
+ BestIdx = i;
+ BestGenerality = Generality;
+ }
+ }
+
+ OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+ OpInfo.ConstraintType = BestType;
+}
+
+/// Determines the constraint code and constraint type to use for the specific
+/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+ SDValue Op,
+ SelectionDAG *DAG) const {
+ assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+ // Single-letter constraints ('r') are very common.
+ if (OpInfo.Codes.size() == 1) {
+ OpInfo.ConstraintCode = OpInfo.Codes[0];
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ } else {
+ ChooseConstraint(OpInfo, *this, Op, DAG);
+ }
+
+ // 'X' matches anything.
+ if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+ // Labels and constants are handled elsewhere ('X' is the only thing
+ // that matches labels). For Functions, the type here is the type of
+ // the result, which is not what we want to look at; leave them alone.
+ Value *v = OpInfo.CallOperandVal;
+ if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+ OpInfo.CallOperandVal = v;
+ return;
+ }
+
+ // Otherwise, try to resolve it to something we know about by looking at
+ // the actual operand type.
+ if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+ OpInfo.ConstraintCode = Repl;
+ OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+ }
+ }
+}
+
+/// \brief Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
+ const SDLoc &dl, SelectionDAG &DAG,
+ std::vector<SDNode *> &Created) {
+ assert(d != 0 && "Division by zero!");
+
+ // Shift the value upfront if it is even, so the LSB is one.
+ unsigned ShAmt = d.countTrailingZeros();
+ if (ShAmt) {
+ // TODO: For UDIV use SRL instead of SRA.
+ SDValue Amt =
+ DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(),
+ DAG.getDataLayout()));
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags);
+ Created.push_back(Op1.getNode());
+ d = d.ashr(ShAmt);
+ }
+
+ // Calculate the multiplicative inverse, using Newton's method.
+ APInt t, xn = d;
+ while ((t = d*xn) != 1)
+ xn *= APInt(d.getBitWidth(), 2) - t;
+
+ SDValue Op2 = DAG.getConstant(xn, dl, Op1.getValueType());
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+ Created.push_back(Mul.getNode());
+ return Mul;
+}
+
+SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ std::vector<SDNode *> *Created) const {
+ AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue();
+}
+
+/// \brief Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode *> *Created) const {
+ assert(Created && "No vector to hold sdiv ops.");
+
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // If the sdiv has an 'exact' bit we can use a simpler lowering.
+ if (cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact())
+ return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created);
+
+ APInt::ms magics = Divisor.magic();
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ SDValue Q;
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
+ isOperationLegalOrCustom(ISD::MULHS, VT))
+ Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+ DAG.getConstant(magics.m, dl, VT));
+ else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+ N->getOperand(0),
+ DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhs or equvialent
+ // If d > 0 and m < 0, add the numerator
+ if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+ Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+ Created->push_back(Q.getNode());
+ }
+ // If d < 0 and m > 0, subtract the numerator.
+ if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+ Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+ Created->push_back(Q.getNode());
+ }
+ auto &DL = DAG.getDataLayout();
+ // Shift right algebraic if shift value is nonzero
+ if (magics.s > 0) {
+ Q = DAG.getNode(
+ ISD::SRA, dl, VT, Q,
+ DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
+ Created->push_back(Q.getNode());
+ }
+ // Extract the sign bit and add it to the quotient
+ SDValue T =
+ DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
+ getShiftAmountTy(Q.getValueType(), DL)));
+ Created->push_back(T.getNode());
+ return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// \brief Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode *> *Created) const {
+ assert(Created && "No vector to hold udiv ops.");
+
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+ auto &DL = DAG.getDataLayout();
+
+ // Check to see if we can do this.
+ // FIXME: We should be more aggressive here.
+ if (!isTypeLegal(VT))
+ return SDValue();
+
+ // FIXME: We should use a narrower constant when the upper
+ // bits are known to be zero.
+ APInt::mu magics = Divisor.magicu();
+
+ SDValue Q = N->getOperand(0);
+
+ // If the divisor is even, we can avoid using the expensive fixup by shifting
+ // the divided value upfront.
+ if (magics.a != 0 && !Divisor[0]) {
+ unsigned Shift = Divisor.countTrailingZeros();
+ Q = DAG.getNode(
+ ISD::SRL, dl, VT, Q,
+ DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
+ Created->push_back(Q.getNode());
+
+ // Get magic number for the shifted divisor.
+ magics = Divisor.lshr(Shift).magicu(Shift);
+ assert(magics.a == 0 && "Should use cheap fixup now");
+ }
+
+ // Multiply the numerator (operand 0) by the magic value
+ // FIXME: We should support doing a MUL in a wider type
+ if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
+ isOperationLegalOrCustom(ISD::MULHU, VT))
+ Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT));
+ else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
+ isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+ Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
+ DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
+ else
+ return SDValue(); // No mulhu or equvialent
+
+ Created->push_back(Q.getNode());
+
+ if (magics.a == 0) {
+ assert(magics.s < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ return DAG.getNode(
+ ISD::SRL, dl, VT, Q,
+ DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
+ } else {
+ SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(
+ ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
+ Created->push_back(NPQ.getNode());
+ NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+ Created->push_back(NPQ.getNode());
+ return DAG.getNode(
+ ISD::SRL, dl, VT, NPQ,
+ DAG.getConstant(magics.s - 1, dl,
+ getShiftAmountTy(NPQ.getValueType(), DL)));
+ }
+}
+
+bool TargetLowering::
+verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
+ if (!isa<ConstantSDNode>(Op.getOperand(0))) {
+ DAG.getContext()->emitError("argument to '__builtin_return_address' must "
+ "be a constant integer");
+ return true;
+ }
+
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Legalization Utilities
+//===----------------------------------------------------------------------===//
+
+bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
+ SelectionDAG &DAG, SDValue LL, SDValue LH,
+ SDValue RL, SDValue RH) const {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
+ bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
+ bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
+ bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
+ if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+ unsigned OuterBitSize = VT.getSizeInBits();
+ unsigned InnerBitSize = HiLoVT.getSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+ // LL, LH, RL, and RH must be either all NULL or all set to a value.
+ assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
+ (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
+
+ if (!LL.getNode() && !RL.getNode() &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0));
+ RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1));
+ }
+
+ if (!LL.getNode())
+ return false;
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ if (HasUMUL_LOHI) {
+ // We can emit a umul_lohi.
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
+ RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return true;
+ }
+ if (HasMULHU) {
+ // We can emit a mulhu+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
+ return true;
+ }
+ }
+ if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ if (HasSMUL_LOHI) {
+ // We can emit a smul_lohi.
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
+ RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return true;
+ }
+ if (HasMULHS) {
+ // We can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL);
+ return true;
+ }
+ }
+
+ if (!LH.getNode() && !RH.getNode() &&
+ isOperationLegalOrCustom(ISD::SRL, VT) &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ auto &DL = DAG.getDataLayout();
+ unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
+ SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL));
+ LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
+ LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
+ RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
+ RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
+ }
+
+ if (!LH.getNode())
+ return false;
+
+ if (HasUMUL_LOHI) {
+ // Lo,Hi = umul LHS, RHS.
+ SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
+ Lo = UMulLOHI;
+ Hi = UMulLOHI.getValue(1);
+ RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+ return true;
+ }
+ if (HasMULHU) {
+ Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
+ RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
+ SelectionDAG &DAG) const {
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+ SDLoc dl(SDValue(Node, 0));
+
+ // FIXME: Only f32 to i64 conversions are supported.
+ if (VT != MVT::f32 || NVT != MVT::i64)
+ return false;
+
+ // Expand f32 -> i64 conversion
+ // This algorithm comes from compiler-rt's implementation of fixsfdi:
+ // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits());
+ SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
+ SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
+ SDValue Bias = DAG.getConstant(127, dl, IntVT);
+ SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl,
+ IntVT);
+ SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
+ SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
+
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+
+ auto &DL = DAG.getDataLayout();
+ SDValue ExponentBits = DAG.getNode(
+ ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
+ SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+ SDValue Sign = DAG.getNode(
+ ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
+ Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+
+ SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+ DAG.getConstant(0x00800000, dl, IntVT));
+
+ R = DAG.getZExtOrTrunc(R, dl, NVT);
+
+ R = DAG.getSelectCC(
+ dl, Exponent, ExponentLoBit,
+ DAG.getNode(ISD::SHL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+ dl, getShiftAmountTy(IntVT, DL))),
+ DAG.getNode(ISD::SRL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+ dl, getShiftAmountTy(IntVT, DL))),
+ ISD::SETGT);
+
+ SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
+ DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
+ Sign);
+
+ Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
+ DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
+ return true;
+}
+
+SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
+ SelectionDAG &DAG) const {
+ SDLoc SL(LD);
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ EVT SrcVT = LD->getMemoryVT();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ unsigned NumElem = SrcVT.getVectorNumElements();
+
+ EVT SrcEltVT = SrcVT.getScalarType();
+ EVT DstEltVT = LD->getValueType(0).getScalarType();
+
+ unsigned Stride = SrcEltVT.getSizeInBits() / 8;
+ assert(SrcEltVT.isByteSized());
+
+ EVT PtrVT = BasePTR.getValueType();
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue ScalarLoad =
+ DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Idx * Stride),
+ SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+
+ BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR,
+ DAG.getConstant(Stride, SL, PtrVT));
+
+ Vals.push_back(ScalarLoad.getValue(0));
+ LoadChains.push_back(ScalarLoad.getValue(1));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals);
+
+ return DAG.getMergeValues({ Value, NewChain }, SL);
+}
+
+// FIXME: This relies on each element having a byte size, otherwise the stride
+// is 0 and just overwrites the same location. ExpandStore currently expects
+// this broken behavior.
+SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
+ SelectionDAG &DAG) const {
+ SDLoc SL(ST);
+
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+
+ // The type of the data we want to save
+ EVT RegVT = Value.getValueType();
+ EVT RegSclVT = RegVT.getScalarType();
+
+ // The type of data as saved in memory.
+ EVT MemSclVT = StVT.getScalarType();
+
+ EVT PtrVT = BasePtr.getValueType();
+
+ // Store Stride in bytes
+ unsigned Stride = MemSclVT.getSizeInBits() / 8;
+ EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
+ unsigned NumElem = StVT.getVectorNumElements();
+
+ // Extract each of the elements from the original vector and save them into
+ // memory individually.
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
+ DAG.getConstant(Idx, SL, IdxVT));
+
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
+ DAG.getConstant(Idx * Stride, SL, PtrVT));
+
+ // This scalar TruncStore may be illegal, but we legalize it later.
+ SDValue Store = DAG.getTruncStore(
+ Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
+ MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+
+ Stores.push_back(Store);
+ }
+
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
+}
+
+std::pair<SDValue, SDValue>
+TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
+ assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed loads not implemented!");
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT VT = LD->getValueType(0);
+ EVT LoadedVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ if (VT.isFloatingPoint() || VT.isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+ if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
+ if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) {
+ // Scalarize the load and let the individual components be handled.
+ SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
+ return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
+ }
+
+ // Expand to a (misaligned) integer load of the same size,
+ // then bitconvert to floating point or vector.
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
+ LD->getMemOperand());
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
+ if (LoadedVT != VT)
+ Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+ ISD::ANY_EXTEND, dl, VT, Result);
+
+ return std::make_pair(Result, newLoad.getValue(1));
+ }
+
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ EVT PtrVT = Ptr.getValueType();
+ EVT StackPtrVT = StackPtr.getValueType();
+
+ SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
+ SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(
+ RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ // Follow the load with a store to the stack slot. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo()));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr,
+ StackPtrIncrement);
+ }
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset), MemVT,
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), MemVT));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo(), LoadedVT);
+
+ // Callers expect a MERGE_VALUES node.
+ return std::make_pair(Load, TF);
+ }
+
+ assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+ "Unaligned load of unsupported type.");
+
+ // Compute the new VT that is half the size of the old one. This is an
+ // integer MVT.
+ unsigned NumBits = LoadedVT.getSizeInBits();
+ EVT NewLoadedVT;
+ NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+ NumBits >>= 1;
+
+ unsigned Alignment = LD->getAlignment();
+ unsigned IncrementSize = NumBits / 8;
+ ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+ // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+ if (HiExtType == ISD::NON_EXTLOAD)
+ HiExtType = ISD::ZEXTLOAD;
+
+ // Load the value in two parts
+ SDValue Lo, Hi;
+ if (DAG.getDataLayout().isLittleEndian()) {
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, MinAlign(Alignment, IncrementSize),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ } else {
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, MinAlign(Alignment, IncrementSize),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ }
+
+ // aggregate the two parts
+ SDValue ShiftAmount =
+ DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
+ DAG.getDataLayout()));
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+ Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ return std::make_pair(Result, TF);
+}
+
+SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
+ SelectionDAG &DAG) const {
+ assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+ "unaligned indexed stores not implemented!");
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Val = ST->getValue();
+ EVT VT = Val.getValueType();
+ int Alignment = ST->getAlignment();
+
+ SDLoc dl(ST);
+ if (ST->getMemoryVT().isFloatingPoint() ||
+ ST->getMemoryVT().isVector()) {
+ EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (isTypeLegal(intVT)) {
+ if (!isOperationLegalOrCustom(ISD::STORE, intVT)) {
+ // Scalarize the store and let the individual components be handled.
+ SDValue Result = scalarizeVectorStore(ST, DAG);
+
+ return Result;
+ }
+ // Expand to a bitconvert of the value to the integer type of the
+ // same size, then a (misaligned) int store.
+ // FIXME: Does not handle truncating floating point stores!
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ Alignment, ST->getMemOperand()->getFlags());
+ return Result;
+ }
+ // Do a (aligned) store to a stack slot, then copy from the stack slot
+ // to the final destination using (unaligned) integer loads and stores.
+ EVT StoredVT = ST->getMemoryVT();
+ MVT RegVT =
+ getRegisterType(*DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(),
+ StoredVT.getSizeInBits()));
+ EVT PtrVT = Ptr.getValueType();
+ unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+ // Perform the original store, only redirected to the stack slot.
+ SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr,
+ MachinePointerInfo(), StoredVT);
+
+ EVT StackPtrVT = StackPtr.getValueType();
+
+ SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
+ SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
+ SmallVector<SDValue, 8> Stores;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the stack slot.
+ SDValue Load =
+ DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo());
+ // Store it to the final location. Remember the store.
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ MinAlign(ST->getAlignment(), Offset),
+ ST->getMemOperand()->getFlags()));
+ // Increment the pointers.
+ Offset += RegBytes;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT,
+ StackPtr, StackPtrIncrement);
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
+ }
+
+ // The last store may be partial. Do a truncating store. On big-endian
+ // machines this requires an extending load from the stack slot to ensure
+ // that the bits are in the right place.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (StoredBytes - Offset));
+
+ // Load from the stack slot.
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(), MemVT);
+
+ Stores.push_back(
+ DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+ ST->getPointerInfo().getWithOffset(Offset), MemVT,
+ MinAlign(ST->getAlignment(), Offset),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo()));
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ return Result;
+ }
+
+ assert(ST->getMemoryVT().isInteger() &&
+ !ST->getMemoryVT().isVector() &&
+ "Unaligned store of unknown type.");
+ // Get the half-size VT
+ EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+ int NumBits = NewStoredVT.getSizeInBits();
+ int IncrementSize = NumBits / 8;
+
+ // Divide the stored value in two parts.
+ SDValue ShiftAmount =
+ DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(),
+ DAG.getDataLayout()));
+ SDValue Lo = Val;
+ SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+ // Store the two parts
+ SDValue Store1, Store2;
+ Store1 = DAG.getTruncStore(Chain, dl,
+ DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
+ Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
+ ST->getMemOperand()->getFlags());
+
+ EVT PtrVT = Ptr.getValueType();
+ Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
+ DAG.getConstant(IncrementSize, dl, PtrVT));
+ Alignment = MinAlign(Alignment, IncrementSize);
+ Store2 = DAG.getTruncStore(
+ Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+
+ SDValue Result =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Implementation of Emulated TLS Model
+//===----------------------------------------------------------------------===//
+
+SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ // Access to address of TLS varialbe xyz is lowered to a function call:
+ // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
+ SDLoc dl(GA);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
+ Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
+ StringRef EmuTlsVarName(NameString);
+ GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
+ assert(EmuTlsVar && "Cannot find EmuTlsVar ");
+ Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
+ Entry.Ty = VoidPtrType;
+ Args.push_back(Entry);
+
+ SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
+ CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ // At last for X86 targets, maybe good for other targets too?
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true); // Is this only for X86 target?
+ MFI->setHasCalls(true);
+
+ assert((GA->getOffset() == 0) &&
+ "Emulated TLS must have zero offset in GlobalAddressSDNode");
+ return CallResult.first;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
new file mode 100644
index 000000000000..1efc440cd701
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -0,0 +1,468 @@
+//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom lowering code required by the shadow-stack GC
+// strategy.
+//
+// This pass implements the code transformation described in this paper:
+// "Accurate Garbage Collection in an Uncooperative Environment"
+// Fergus Henderson, ISMM, 2002
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "shadowstackgclowering"
+
+namespace {
+
+class ShadowStackGCLowering : public FunctionPass {
+ /// RootChain - This is the global linked-list that contains the chain of GC
+ /// roots.
+ GlobalVariable *Head;
+
+ /// StackEntryTy - Abstract type of a link in the shadow stack.
+ ///
+ StructType *StackEntryTy;
+ StructType *FrameMapTy;
+
+ /// Roots - GC roots in the current function. Each is a pair of the
+ /// intrinsic call and its corresponding alloca.
+ std::vector<std::pair<CallInst *, AllocaInst *>> Roots;
+
+public:
+ static char ID;
+ ShadowStackGCLowering();
+
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+private:
+ bool IsNullValue(Value *V);
+ Constant *GetFrameMap(Function &F);
+ Type *GetConcreteStackEntryType(Function &F);
+ void CollectRoots(Function &F);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+ Type *Ty, Value *BasePtr, int Idx1,
+ const char *Name);
+ static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
+ Type *Ty, Value *BasePtr, int Idx1, int Idx2,
+ const char *Name);
+};
+}
+
+INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+ "Shadow Stack GC Lowering", false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering",
+ "Shadow Stack GC Lowering", false, false)
+
+FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); }
+
+char ShadowStackGCLowering::ID = 0;
+
+ShadowStackGCLowering::ShadowStackGCLowering()
+ : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr),
+ FrameMapTy(nullptr) {
+ initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
+}
+
+namespace {
+/// EscapeEnumerator - This is a little algorithm to find all escape points
+/// from a function so that "finally"-style code can be inserted. In addition
+/// to finding the existing return and unwind instructions, it also (if
+/// necessary) transforms any call instructions into invokes and sends them to
+/// a landing pad.
+///
+/// It's wrapped up in a state machine using the same transform C# uses for
+/// 'yield return' enumerators, This transform allows it to be non-allocating.
+class EscapeEnumerator {
+ Function &F;
+ const char *CleanupBBName;
+
+ // State.
+ int State;
+ Function::iterator StateBB, StateE;
+ IRBuilder<> Builder;
+
+public:
+ EscapeEnumerator(Function &F, const char *N = "cleanup")
+ : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+ IRBuilder<> *Next() {
+ switch (State) {
+ default:
+ return nullptr;
+
+ case 0:
+ StateBB = F.begin();
+ StateE = F.end();
+ State = 1;
+
+ case 1:
+ // Find all 'return', 'resume', and 'unwind' instructions.
+ while (StateBB != StateE) {
+ BasicBlock *CurBB = &*StateBB++;
+
+ // Branches and invokes do not escape, only unwind, resume, and return
+ // do.
+ TerminatorInst *TI = CurBB->getTerminator();
+ if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
+ continue;
+
+ Builder.SetInsertPoint(TI);
+ return &Builder;
+ }
+
+ State = 2;
+
+ // Find all 'call' instructions.
+ SmallVector<Instruction *, 16> Calls;
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE;
+ ++II)
+ if (CallInst *CI = dyn_cast<CallInst>(II))
+ if (!CI->getCalledFunction() ||
+ !CI->getCalledFunction()->getIntrinsicID())
+ Calls.push_back(CI);
+
+ if (Calls.empty())
+ return nullptr;
+
+ // Create a cleanup block.
+ LLVMContext &C = F.getContext();
+ BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+ Type *ExnTy =
+ StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
+ if (!F.hasPersonalityFn()) {
+ Constant *PersFn = F.getParent()->getOrInsertFunction(
+ "__gcc_personality_v0",
+ FunctionType::get(Type::getInt32Ty(C), true));
+ F.setPersonalityFn(PersFn);
+ }
+ LandingPadInst *LPad =
+ LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
+ LPad->setCleanup(true);
+ ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+ // Transform the 'call' instructions into 'invoke's branching to the
+ // cleanup block. Go in reverse order to make prettier BB names.
+ SmallVector<Value *, 16> Args;
+ for (unsigned I = Calls.size(); I != 0;) {
+ CallInst *CI = cast<CallInst>(Calls[--I]);
+
+ // Split the basic block containing the function call.
+ BasicBlock *CallBB = CI->getParent();
+ BasicBlock *NewBB = CallBB->splitBasicBlock(
+ CI->getIterator(), CallBB->getName() + ".cont");
+
+ // Remove the unconditional branch inserted at the end of CallBB.
+ CallBB->getInstList().pop_back();
+ NewBB->getInstList().remove(CI);
+
+ // Create a new invoke instruction.
+ Args.clear();
+ CallSite CS(CI);
+ Args.append(CS.arg_begin(), CS.arg_end());
+
+ InvokeInst *II =
+ InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args,
+ CI->getName(), CallBB);
+ II->setCallingConv(CI->getCallingConv());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ delete CI;
+ }
+
+ Builder.SetInsertPoint(RI);
+ return &Builder;
+ }
+ }
+};
+}
+
+
+Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
+ // doInitialization creates the abstract type of this value.
+ Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+ // Truncate the ShadowStackDescriptor if some metadata is null.
+ unsigned NumMeta = 0;
+ SmallVector<Constant *, 16> Metadata;
+ for (unsigned I = 0; I != Roots.size(); ++I) {
+ Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+ if (!C->isNullValue())
+ NumMeta = I + 1;
+ Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+ }
+ Metadata.resize(NumMeta);
+
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
+
+ Constant *BaseElts[] = {
+ ConstantInt::get(Int32Ty, Roots.size(), false),
+ ConstantInt::get(Int32Ty, NumMeta, false),
+ };
+
+ Constant *DescriptorElts[] = {
+ ConstantStruct::get(FrameMapTy, BaseElts),
+ ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)};
+
+ Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()};
+ StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta));
+
+ Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
+
+ // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+ // that, short of multithreaded LLVM, it should be safe; all that is
+ // necessary is that a simple Module::iterator loop not be invalidated.
+ // Appending to the GlobalVariable list is safe in that sense.
+ //
+ // All of the output passes emit globals last. The ExecutionEngine
+ // explicitly supports adding globals to the module after
+ // initialization.
+ //
+ // Still, if it isn't deemed acceptable, then this transformation needs
+ // to be a ModulePass (which means it cannot be in the 'llc' pipeline
+ // (which uses a FunctionPassManager (which segfaults (not asserts) if
+ // provided a ModulePass))).
+ Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+ GlobalVariable::InternalLinkage, FrameMap,
+ "__gc_" + F.getName());
+
+ Constant *GEPIndices[2] = {
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
+ return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices);
+}
+
+Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
+ // doInitialization creates the generic version of this type.
+ std::vector<Type *> EltTys;
+ EltTys.push_back(StackEntryTy);
+ for (size_t I = 0; I != Roots.size(); I++)
+ EltTys.push_back(Roots[I].second->getAllocatedType());
+
+ return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGCLowering::doInitialization(Module &M) {
+ bool Active = false;
+ for (Function &F : M) {
+ if (F.hasGC() && F.getGC() == std::string("shadow-stack")) {
+ Active = true;
+ break;
+ }
+ }
+ if (!Active)
+ return false;
+
+ // struct FrameMap {
+ // int32_t NumRoots; // Number of roots in stack frame.
+ // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
+ // void *Meta[]; // May be absent for roots without metadata.
+ // };
+ std::vector<Type *> EltTys;
+ // 32 bits is ok up to a 32GB stack frame. :)
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ // Specifies length of variable length array.
+ EltTys.push_back(Type::getInt32Ty(M.getContext()));
+ FrameMapTy = StructType::create(EltTys, "gc_map");
+ PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+ // struct StackEntry {
+ // ShadowStackEntry *Next; // Caller's stack entry.
+ // FrameMap *Map; // Pointer to constant FrameMap.
+ // void *Roots[]; // Stack roots (in-place array, so we pretend).
+ // };
+
+ StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
+
+ EltTys.clear();
+ EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+ EltTys.push_back(FrameMapPtrTy);
+ StackEntryTy->setBody(EltTys);
+ PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+
+ // Get the root chain if it already exists.
+ Head = M.getGlobalVariable("llvm_gc_root_chain");
+ if (!Head) {
+ // If the root chain does not exist, insert a new one with linkonce
+ // linkage!
+ Head = new GlobalVariable(
+ M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage,
+ Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain");
+ } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+ Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+ Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+ }
+
+ return true;
+}
+
+bool ShadowStackGCLowering::IsNullValue(Value *V) {
+ if (Constant *C = dyn_cast<Constant>(V))
+ return C->isNullValue();
+ return false;
+}
+
+void ShadowStackGCLowering::CollectRoots(Function &F) {
+ // FIXME: Account for original alignment. Could fragment the root array.
+ // Approach 1: Null initialize empty slots at runtime. Yuck.
+ // Approach 2: Emit a map of the array instead of just a count.
+
+ assert(Roots.empty() && "Not cleaned up?");
+
+ SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots;
+
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+ if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+ if (Function *F = CI->getCalledFunction())
+ if (F->getIntrinsicID() == Intrinsic::gcroot) {
+ std::pair<CallInst *, AllocaInst *> Pair = std::make_pair(
+ CI,
+ cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ if (IsNullValue(CI->getArgOperand(1)))
+ Roots.push_back(Pair);
+ else
+ MetaRoots.push_back(Pair);
+ }
+
+ // Number roots with metadata (usually empty) at the beginning, so that the
+ // FrameMap::Meta array can be elided.
+ Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Type *Ty,
+ Value *BasePtr, int Idx,
+ int Idx2,
+ const char *Name) {
+ Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx2)};
+ Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
+ IRBuilder<> &B, Type *Ty, Value *BasePtr,
+ int Idx, const char *Name) {
+ Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
+ ConstantInt::get(Type::getInt32Ty(Context), Idx)};
+ Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
+
+ assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+ return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGCLowering::runOnFunction(Function &F) {
+ // Quick exit for functions that do not use the shadow stack GC.
+ if (!F.hasGC() ||
+ F.getGC() != std::string("shadow-stack"))
+ return false;
+
+ LLVMContext &Context = F.getContext();
+
+ // Find calls to llvm.gcroot.
+ CollectRoots(F);
+
+ // If there are no roots in this function, then there is no need to add a
+ // stack map entry for it.
+ if (Roots.empty())
+ return false;
+
+ // Build the constant map and figure the type of the shadow stack entry.
+ Value *FrameMap = GetFrameMap(F);
+ Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+ // Build the shadow stack entry at the very start of the function.
+ BasicBlock::iterator IP = F.getEntryBlock().begin();
+ IRBuilder<> AtEntry(IP->getParent(), IP);
+
+ Instruction *StackEntry =
+ AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame");
+
+ while (isa<AllocaInst>(IP))
+ ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Initialize the map pointer and load the current head of the shadow stack.
+ Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, 1, "gc_frame.map");
+ AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+ // After all the allocas...
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ // For each root, find the corresponding slot in the aggregate...
+ Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 1 + I, "gc_root");
+
+ // And use it in lieu of the alloca.
+ AllocaInst *OriginalAlloca = Roots[I].second;
+ SlotPtr->takeName(OriginalAlloca);
+ OriginalAlloca->replaceAllUsesWith(SlotPtr);
+ }
+
+ // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+ // really necessary (the collector would never see the intermediate state at
+ // runtime), but it's nicer not to push the half-initialized entry onto the
+ // shadow stack.
+ while (isa<StoreInst>(IP))
+ ++IP;
+ AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+ // Push the entry onto the shadow stack.
+ Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, 0, "gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, "gc_newhead");
+ AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+ AtEntry.CreateStore(NewHeadVal, Head);
+
+ // For each instruction that escapes...
+ EscapeEnumerator EE(F, "gc_cleanup");
+ while (IRBuilder<> *AtExit = EE.Next()) {
+ // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+ // AtEntry, since that would make the value live for the entire function.
+ Instruction *EntryNextPtr2 =
+ CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
+ "gc_frame.next");
+ Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ AtExit->CreateStore(SavedHead, Head);
+ }
+
+ // Delete the original allocas (which are no longer used) and the intrinsic
+ // calls (which are no longer valid). Doing this last avoids invalidating
+ // iterators.
+ for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+ Roots[I].first->eraseFromParent();
+ Roots[I].second->eraseFromParent();
+ }
+
+ Roots.clear();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
new file mode 100644
index 000000000000..d361a6c4ad06
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -0,0 +1,556 @@
+//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for safe point where the prologue and epilogue can be
+// inserted.
+// The safe point for the prologue (resp. epilogue) is called Save
+// (resp. Restore).
+// A point is safe for prologue (resp. epilogue) if and only if
+// it 1) dominates (resp. post-dominates) all the frame related operations and
+// between 2) two executions of the Save (resp. Restore) point there is an
+// execution of the Restore (resp. Save) point.
+//
+// For instance, the following points are safe:
+// for (int i = 0; i < 10; ++i) {
+// Save
+// ...
+// Restore
+// }
+// Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
+// And the following points are not:
+// for (int i = 0; i < 10; ++i) {
+// Save
+// ...
+// }
+// for (int i = 0; i < 10; ++i) {
+// ...
+// Restore
+// }
+// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
+//
+// This pass also ensures that the safe points are 3) cheaper than the regular
+// entry and exits blocks.
+//
+// Property #1 is ensured via the use of MachineDominatorTree and
+// MachinePostDominatorTree.
+// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
+// points must be in the same loop.
+// Property #3 is ensured via the MachineBlockFrequencyInfo.
+//
+// If this pass found points matching all these properties, then
+// MachineFrameInfo is updated with this information.
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+// To check for profitability.
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+// For property #1 for Save.
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+// To record the result of the analysis.
+#include "llvm/CodeGen/MachineFrameInfo.h"
+// For property #2.
+#include "llvm/CodeGen/MachineLoopInfo.h"
+// For property #1 for Restore.
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+// To know about callee-saved.
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+// To query the target about frame lowering.
+#include "llvm/Target/TargetFrameLowering.h"
+// To know about frame setup operation.
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+// To access TargetInstrInfo.
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "shrink-wrap"
+
+using namespace llvm;
+
+STATISTIC(NumFunc, "Number of functions");
+STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
+STATISTIC(NumCandidatesDropped,
+ "Number of shrink-wrapping candidates dropped because of frequency");
+
+static cl::opt<cl::boolOrDefault>
+ EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
+ cl::desc("enable the shrink-wrapping pass"));
+
+namespace {
+/// \brief Class to determine where the safe point to insert the
+/// prologue and epilogue are.
+/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
+/// shrink-wrapping term for prologue/epilogue placement, this pass
+/// does not rely on expensive data-flow analysis. Instead we use the
+/// dominance properties and loop information to decide which point
+/// are safe for such insertion.
+class ShrinkWrap : public MachineFunctionPass {
+ /// Hold callee-saved information.
+ RegisterClassInfo RCI;
+ MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
+ /// Current safe point found for the prologue.
+ /// The prologue will be inserted before the first instruction
+ /// in this basic block.
+ MachineBasicBlock *Save;
+ /// Current safe point found for the epilogue.
+ /// The epilogue will be inserted before the first terminator instruction
+ /// in this basic block.
+ MachineBasicBlock *Restore;
+ /// Hold the information of the basic block frequency.
+ /// Use to check the profitability of the new points.
+ MachineBlockFrequencyInfo *MBFI;
+ /// Hold the loop information. Used to determine if Save and Restore
+ /// are in the same loop.
+ MachineLoopInfo *MLI;
+ /// Frequency of the Entry block.
+ uint64_t EntryFreq;
+ /// Current opcode for frame setup.
+ unsigned FrameSetupOpcode;
+ /// Current opcode for frame destroy.
+ unsigned FrameDestroyOpcode;
+ /// Entry block.
+ const MachineBasicBlock *Entry;
+ typedef SmallSetVector<unsigned, 16> SetOfRegs;
+ /// Registers that need to be saved for the current function.
+ mutable SetOfRegs CurrentCSRs;
+ /// Current MachineFunction.
+ MachineFunction *MachineFunc;
+
+ /// \brief Check if \p MI uses or defines a callee-saved register or
+ /// a frame index. If this is the case, this means \p MI must happen
+ /// after Save and before Restore.
+ bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const;
+
+ const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
+ if (CurrentCSRs.empty()) {
+ BitVector SavedRegs;
+ const TargetFrameLowering *TFI =
+ MachineFunc->getSubtarget().getFrameLowering();
+
+ TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS);
+
+ for (int Reg = SavedRegs.find_first(); Reg != -1;
+ Reg = SavedRegs.find_next(Reg))
+ CurrentCSRs.insert((unsigned)Reg);
+ }
+ return CurrentCSRs;
+ }
+
+ /// \brief Update the Save and Restore points such that \p MBB is in
+ /// the region that is dominated by Save and post-dominated by Restore
+ /// and Save and Restore still match the safe point definition.
+ /// Such point may not exist and Save and/or Restore may be null after
+ /// this call.
+ void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
+
+ /// \brief Initialize the pass for \p MF.
+ void init(MachineFunction &MF) {
+ RCI.runOnMachineFunction(MF);
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ Save = nullptr;
+ Restore = nullptr;
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ EntryFreq = MBFI->getEntryFreq();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+ Entry = &MF.front();
+ CurrentCSRs.clear();
+ MachineFunc = &MF;
+
+ ++NumFunc;
+ }
+
+ /// Check whether or not Save and Restore points are still interesting for
+ /// shrink-wrapping.
+ bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
+
+ /// \brief Check if shrink wrapping is enabled for this target and function.
+ static bool isShrinkWrapEnabled(const MachineFunction &MF);
+
+public:
+ static char ID;
+
+ ShrinkWrap() : MachineFunctionPass(ID) {
+ initializeShrinkWrapPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const override {
+ return "Shrink Wrapping analysis";
+ }
+
+ /// \brief Perform the shrink-wrapping analysis and update
+ /// the MachineFrameInfo attached to \p MF with the results.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char ShrinkWrap::ID = 0;
+char &llvm::ShrinkWrapID = ShrinkWrap::ID;
+
+INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false)
+
+bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
+ RegScavenger *RS) const {
+ if (MI.getOpcode() == FrameSetupOpcode ||
+ MI.getOpcode() == FrameDestroyOpcode) {
+ DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
+ return true;
+ }
+ for (const MachineOperand &MO : MI.operands()) {
+ bool UseOrDefCSR = false;
+ if (MO.isReg()) {
+ unsigned PhysReg = MO.getReg();
+ if (!PhysReg)
+ continue;
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ "Unallocated register?!");
+ UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+ } else if (MO.isRegMask()) {
+ // Check if this regmask clobbers any of the CSRs.
+ for (unsigned Reg : getCurrentCSRs(RS)) {
+ if (MO.clobbersPhysReg(Reg)) {
+ UseOrDefCSR = true;
+ break;
+ }
+ }
+ }
+ if (UseOrDefCSR || MO.isFI()) {
+ DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
+ << MO.isFI() << "): " << MI << '\n');
+ return true;
+ }
+ }
+ return false;
+}
+
+/// \brief Helper function to find the immediate (post) dominator.
+template <typename ListOfBBs, typename DominanceAnalysis>
+MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
+ DominanceAnalysis &Dom) {
+ MachineBasicBlock *IDom = &Block;
+ for (MachineBasicBlock *BB : BBs) {
+ IDom = Dom.findNearestCommonDominator(IDom, BB);
+ if (!IDom)
+ break;
+ }
+ if (IDom == &Block)
+ return nullptr;
+ return IDom;
+}
+
+void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
+ RegScavenger *RS) {
+ // Get rid of the easy cases first.
+ if (!Save)
+ Save = &MBB;
+ else
+ Save = MDT->findNearestCommonDominator(Save, &MBB);
+
+ if (!Save) {
+ DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
+ return;
+ }
+
+ if (!Restore)
+ Restore = &MBB;
+ else
+ Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
+
+ // Make sure we would be able to insert the restore code before the
+ // terminator.
+ if (Restore == &MBB) {
+ for (const MachineInstr &Terminator : MBB.terminators()) {
+ if (!useOrDefCSROrFI(Terminator, RS))
+ continue;
+ // One of the terminator needs to happen before the restore point.
+ if (MBB.succ_empty()) {
+ Restore = nullptr;
+ break;
+ }
+ // Look for a restore point that post-dominates all the successors.
+ // The immediate post-dominator is what we are looking for.
+ Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+ break;
+ }
+ }
+
+ if (!Restore) {
+ DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n");
+ return;
+ }
+
+ // Make sure Save and Restore are suitable for shrink-wrapping:
+ // 1. all path from Save needs to lead to Restore before exiting.
+ // 2. all path to Restore needs to go through Save from Entry.
+ // We achieve that by making sure that:
+ // A. Save dominates Restore.
+ // B. Restore post-dominates Save.
+ // C. Save and Restore are in the same loop.
+ bool SaveDominatesRestore = false;
+ bool RestorePostDominatesSave = false;
+ while (Save && Restore &&
+ (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
+ !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
+ // Post-dominance is not enough in loops to ensure that all uses/defs
+ // are after the prologue and before the epilogue at runtime.
+ // E.g.,
+ // while(1) {
+ // Save
+ // Restore
+ // if (...)
+ // break;
+ // use/def CSRs
+ // }
+ // All the uses/defs of CSRs are dominated by Save and post-dominated
+ // by Restore. However, the CSRs uses are still reachable after
+ // Restore and before Save are executed.
+ //
+ // For now, just push the restore/save points outside of loops.
+ // FIXME: Refine the criteria to still find interesting cases
+ // for loops.
+ MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
+ // Fix (A).
+ if (!SaveDominatesRestore) {
+ Save = MDT->findNearestCommonDominator(Save, Restore);
+ continue;
+ }
+ // Fix (B).
+ if (!RestorePostDominatesSave)
+ Restore = MPDT->findNearestCommonDominator(Restore, Save);
+
+ // Fix (C).
+ if (Save && Restore &&
+ (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
+ if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
+ // Push Save outside of this loop if immediate dominator is different
+ // from save block. If immediate dominator is not different, bail out.
+ Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+ if (!Save)
+ break;
+ } else {
+ // If the loop does not exit, there is no point in looking
+ // for a post-dominator outside the loop.
+ SmallVector<MachineBasicBlock*, 4> ExitBlocks;
+ MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks);
+ // Push Restore outside of this loop.
+ // Look for the immediate post-dominator of the loop exits.
+ MachineBasicBlock *IPdom = Restore;
+ for (MachineBasicBlock *LoopExitBB: ExitBlocks) {
+ IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT);
+ if (!IPdom)
+ break;
+ }
+ // If the immediate post-dominator is not in a less nested loop,
+ // then we are stuck in a program with an infinite loop.
+ // In that case, we will not find a safe point, hence, bail out.
+ if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore))
+ Restore = IPdom;
+ else {
+ Restore = nullptr;
+ break;
+ }
+ }
+ }
+ }
+}
+
+/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI.
+/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the
+/// loop header.
+static bool isProperBackedge(const MachineLoopInfo &MLI,
+ const MachineBasicBlock *SrcBB,
+ const MachineBasicBlock *DestBB) {
+ for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop;
+ Loop = Loop->getParentLoop()) {
+ if (Loop->getHeader() == DestBB)
+ return true;
+ }
+ return false;
+}
+
+/// Check if the CFG of \p MF is irreducible.
+static bool isIrreducibleCFG(const MachineFunction &MF,
+ const MachineLoopInfo &MLI) {
+ const MachineBasicBlock *Entry = &*MF.begin();
+ ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry);
+ BitVector VisitedBB(MF.getNumBlockIDs());
+ for (const MachineBasicBlock *MBB : RPOT) {
+ VisitedBB.set(MBB->getNumber());
+ for (const MachineBasicBlock *SuccBB : MBB->successors()) {
+ if (!VisitedBB.test(SuccBB->getNumber()))
+ continue;
+ // We already visited SuccBB, thus MBB->SuccBB must be a backedge.
+ // Check that the head matches what we have in the loop information.
+ // Otherwise, we have an irreducible graph.
+ if (!isProperBackedge(MLI, MBB, SuccBB))
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.empty() || !isShrinkWrapEnabled(MF))
+ return false;
+
+ DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+ init(MF);
+
+ if (isIrreducibleCFG(MF, *MLI)) {
+ // If MF is irreducible, a block may be in a loop without
+ // MachineLoopInfo reporting it. I.e., we may use the
+ // post-dominance property in loops, which lead to incorrect
+ // results. Moreover, we may miss that the prologue and
+ // epilogue are not in the same loop, leading to unbalanced
+ // construction/deconstruction of the stack frame.
+ DEBUG(dbgs() << "Irreducible CFGs are not supported yet\n");
+ return false;
+ }
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ std::unique_ptr<RegScavenger> RS(
+ TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
+ for (MachineBasicBlock &MBB : MF) {
+ DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName()
+ << '\n');
+
+ if (MBB.isEHFuncletEntry()) {
+ DEBUG(dbgs() << "EH Funclets are not supported yet.\n");
+ return false;
+ }
+
+ for (const MachineInstr &MI : MBB) {
+ if (!useOrDefCSROrFI(MI, RS.get()))
+ continue;
+ // Save (resp. restore) point must dominate (resp. post dominate)
+ // MI. Look for the proper basic block for those.
+ updateSaveRestorePoints(MBB, RS.get());
+ // If we are at a point where we cannot improve the placement of
+ // save/restore instructions, just give up.
+ if (!ArePointsInteresting()) {
+ DEBUG(dbgs() << "No Shrink wrap candidate found\n");
+ return false;
+ }
+ // No need to look for other instructions, this basic block
+ // will already be part of the handled region.
+ break;
+ }
+ }
+ if (!ArePointsInteresting()) {
+ // If the points are not interesting at this point, then they must be null
+ // because it means we did not encounter any frame/CSR related code.
+ // Otherwise, we would have returned from the previous loop.
+ assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!");
+ DEBUG(dbgs() << "Nothing to shrink-wrap\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
+ << '\n');
+
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ do {
+ DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
+ << Save->getNumber() << ' ' << Save->getName() << ' '
+ << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: "
+ << Restore->getNumber() << ' ' << Restore->getName() << ' '
+ << MBFI->getBlockFreq(Restore).getFrequency() << '\n');
+
+ bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
+ if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) &&
+ EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+ ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&
+ TFI->canUseAsEpilogue(*Restore)))
+ break;
+ DEBUG(dbgs() << "New points are too expensive or invalid for the target\n");
+ MachineBasicBlock *NewBB;
+ if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) {
+ Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+ if (!Save)
+ break;
+ NewBB = Save;
+ } else {
+ // Restore is expensive.
+ Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+ if (!Restore)
+ break;
+ NewBB = Restore;
+ }
+ updateSaveRestorePoints(*NewBB, RS.get());
+ } while (Save && Restore);
+
+ if (!ArePointsInteresting()) {
+ ++NumCandidatesDropped;
+ return false;
+ }
+
+ DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber()
+ << ' ' << Save->getName() << "\nRestore: "
+ << Restore->getNumber() << ' ' << Restore->getName() << '\n');
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setSavePoint(Save);
+ MFI->setRestorePoint(Restore);
+ ++NumCandidates;
+ return false;
+}
+
+bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+
+ switch (EnableShrinkWrapOpt) {
+ case cl::BOU_UNSET:
+ return TFI->enableShrinkWrapping(MF) &&
+ // Windows with CFI has some limitations that make it impossible
+ // to use shrink-wrapping.
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+ // Sanitizers look at the value of the stack at the location
+ // of the crash. Since a crash can happen anywhere, the
+ // frame must be lowered before anything else happen for the
+ // sanitizers to be able to get a correct stack frame.
+ !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
+ MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
+ MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory));
+ // If EnableShrinkWrap is set, it takes precedence on whatever the
+ // target sets. The rational is that we assume we want to test
+ // something related to shrink-wrapping.
+ case cl::BOU_TRUE:
+ return true;
+ case cl::BOU_FALSE:
+ return false;
+ }
+ llvm_unreachable("Invalid shrink-wrapping state");
+}
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 000000000000..ce01c5f23e57
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,480 @@
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "sjljehprepare"
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+class SjLjEHPrepare : public FunctionPass {
+ Type *doubleUnderDataTy;
+ Type *doubleUnderJBufTy;
+ Type *FunctionContextTy;
+ Constant *RegisterFn;
+ Constant *UnregisterFn;
+ Constant *BuiltinSetupDispatchFn;
+ Constant *FrameAddrFn;
+ Constant *StackAddrFn;
+ Constant *StackRestoreFn;
+ Constant *LSDAAddrFn;
+ Constant *CallSiteFn;
+ Constant *FuncCtxFn;
+ AllocaInst *FuncCtx;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit SjLjEHPrepare() : FunctionPass(ID) {}
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {}
+ const char *getPassName() const override {
+ return "SJLJ Exception Handling preparation";
+ }
+
+private:
+ bool setupEntryBlockAndCallSites(Function &F);
+ void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal);
+ Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads);
+ void lowerIncomingArguments(Function &F);
+ void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes);
+ void insertCallSiteStore(Instruction *I, int Number);
+};
+} // end anonymous namespace
+
+char SjLjEHPrepare::ID = 0;
+INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions",
+ false, false)
+
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass() { return new SjLjEHPrepare(); }
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPrepare::doInitialization(Module &M) {
+ // Build the function context structure.
+ // builtin_setjmp uses a five word jbuf
+ Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+ Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ doubleUnderDataTy = ArrayType::get(Int32Ty, 4);
+ doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5);
+ FunctionContextTy = StructType::get(VoidPtrTy, // __prev
+ Int32Ty, // call_site
+ doubleUnderDataTy, // __data
+ VoidPtrTy, // __personality
+ VoidPtrTy, // __lsda
+ doubleUnderJBufTy, // __jbuf
+ nullptr);
+
+ return true;
+}
+
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
+ IRBuilder<> Builder(I);
+
+ // Get a reference to the call_site field.
+ Type *Int32Ty = Type::getInt32Ty(I->getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ Value *One = ConstantInt::get(Int32Ty, 1);
+ Value *Idxs[2] = { Zero, One };
+ Value *CallSite =
+ Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site");
+
+ // Insert a store of the call-site number
+ ConstantInt *CallSiteNoC =
+ ConstantInt::get(Type::getInt32Ty(I->getContext()), Number);
+ Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/);
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predecessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB,
+ SmallPtrSetImpl<BasicBlock *> &LiveBBs) {
+ if (!LiveBBs.insert(BB).second)
+ return; // already been here.
+
+ for (BasicBlock *PredBB : predecessors(BB))
+ MarkBlocksLiveIn(PredBB, LiveBBs);
+}
+
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+ Value *SelVal) {
+ SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end());
+ while (!UseWorkList.empty()) {
+ Value *Val = UseWorkList.pop_back_val();
+ auto *EVI = dyn_cast<ExtractValueInst>(Val);
+ if (!EVI)
+ continue;
+ if (EVI->getNumIndices() != 1)
+ continue;
+ if (*EVI->idx_begin() == 0)
+ EVI->replaceAllUsesWith(ExnVal);
+ else if (*EVI->idx_begin() == 1)
+ EVI->replaceAllUsesWith(SelVal);
+ if (EVI->use_empty())
+ EVI->eraseFromParent();
+ }
+
+ if (LPI->use_empty())
+ return;
+
+ // There are still some uses of LPI. Construct an aggregate with the exception
+ // values and replace the LPI with that aggregate.
+ Type *LPadType = LPI->getType();
+ Value *LPadVal = UndefValue::get(LPadType);
+ auto *SelI = cast<Instruction>(SelVal);
+ IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator()));
+ LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+ LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+
+ LPI->replaceAllUsesWith(LPadVal);
+}
+
+/// setupFunctionContext - Allocate the function context on the stack and fill
+/// it with all of the data that we know at this point.
+Value *SjLjEHPrepare::setupFunctionContext(Function &F,
+ ArrayRef<LandingPadInst *> LPads) {
+ BasicBlock *EntryBB = &F.front();
+
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an alloca
+ // because the value needs to be added to the global context list.
+ auto &DL = F.getParent()->getDataLayout();
+ unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
+ FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
+ &EntryBB->front());
+
+ // Fill in the function context structure.
+ for (LandingPadInst *LPI : LPads) {
+ IRBuilder<> Builder(LPI->getParent(),
+ LPI->getParent()->getFirstInsertionPt());
+
+ // Reference the __data field.
+ Value *FCData =
+ Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
+
+ // The exception values come back in context->__data[0].
+ Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+ 0, 0, "exception_gep");
+ Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
+ ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
+
+ Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+ 0, 1, "exn_selector_gep");
+ Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
+
+ substituteLPadValues(LPI, ExnVal, SelVal);
+ }
+
+ // Personality function
+ IRBuilder<> Builder(EntryBB->getTerminator());
+ Value *PersonalityFn = F.getPersonalityFn();
+ Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(
+ FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep");
+ Builder.CreateStore(
+ Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()),
+ PersonalityFieldPtr, /*isVolatile=*/true);
+
+ // LSDA address
+ Value *LSDA = Builder.CreateCall(LSDAAddrFn, {}, "lsda_addr");
+ Value *LSDAFieldPtr =
+ Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 4, "lsda_gep");
+ Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
+
+ return FuncCtx;
+}
+
+/// lowerIncomingArguments - To avoid having to handle incoming arguments
+/// specially, we lower each arg to a copy instruction in the entry block. This
+/// ensures that the argument value itself cannot be live out of the entry
+/// block.
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
+ BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
+ while (isa<AllocaInst>(AfterAllocaInsPt) &&
+ cast<AllocaInst>(AfterAllocaInsPt)->isStaticAlloca())
+ ++AfterAllocaInsPt;
+ assert(AfterAllocaInsPt != F.front().end());
+
+ for (auto &AI : F.args()) {
+ Type *Ty = AI.getType();
+
+ // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
+ Value *TrueValue = ConstantInt::getTrue(F.getContext());
+ Value *UndefValue = UndefValue::get(Ty);
+ Instruction *SI = SelectInst::Create(
+ TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt);
+ AI.replaceAllUsesWith(SI);
+
+ // Reset the operand, because it was clobbered by the RAUW above.
+ SI->setOperand(1, &AI);
+ }
+}
+
+/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
+/// edge and spill them.
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+ ArrayRef<InvokeInst *> Invokes) {
+ // Finally, scan the code looking for instructions with bad live ranges.
+ for (BasicBlock &BB : F) {
+ for (Instruction &Inst : BB) {
+ // Ignore obvious cases we don't have to handle. In particular, most
+ // instructions either have no uses or only have a single use inside the
+ // current block. Ignore them quickly.
+ if (Inst.use_empty())
+ continue;
+ if (Inst.hasOneUse() &&
+ cast<Instruction>(Inst.user_back())->getParent() == &BB &&
+ !isa<PHINode>(Inst.user_back()))
+ continue;
+
+ // If this is an alloca in the entry block, it's not a real register
+ // value.
+ if (auto *AI = dyn_cast<AllocaInst>(&Inst))
+ if (AI->isStaticAlloca())
+ continue;
+
+ // Avoid iterator invalidation by copying users to a temporary vector.
+ SmallVector<Instruction *, 16> Users;
+ for (User *U : Inst.users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (UI->getParent() != &BB || isa<PHINode>(UI))
+ Users.push_back(UI);
+ }
+
+ // Find all of the blocks that this value is live in.
+ SmallPtrSet<BasicBlock *, 32> LiveBBs;
+ LiveBBs.insert(&BB);
+ while (!Users.empty()) {
+ Instruction *U = Users.pop_back_val();
+
+ if (!isa<PHINode>(U)) {
+ MarkBlocksLiveIn(U->getParent(), LiveBBs);
+ } else {
+ // Uses for a PHI node occur in their predecessor block.
+ PHINode *PN = cast<PHINode>(U);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == &Inst)
+ MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+ }
+ }
+
+ // Now that we know all of the blocks that this thing is live in, see if
+ // it includes any of the unwind locations.
+ bool NeedsSpill = false;
+ for (InvokeInst *Invoke : Invokes) {
+ BasicBlock *UnwindBlock = Invoke->getUnwindDest();
+ if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) {
+ DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around "
+ << UnwindBlock->getName() << "\n");
+ NeedsSpill = true;
+ break;
+ }
+ }
+
+ // If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
+ if (NeedsSpill) {
+ DemoteRegToStack(Inst, true);
+ ++NumSpilled;
+ }
+ }
+ }
+
+ // Go through the landing pads and remove any PHIs there.
+ for (InvokeInst *Invoke : Invokes) {
+ BasicBlock *UnwindBlock = Invoke->getUnwindDest();
+ LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
+
+ // Place PHIs into a set to avoid invalidating the iterator.
+ SmallPtrSet<PHINode *, 8> PHIsToDemote;
+ for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
+ PHIsToDemote.insert(cast<PHINode>(PN));
+ if (PHIsToDemote.empty())
+ continue;
+
+ // Demote the PHIs to the stack.
+ for (PHINode *PN : PHIsToDemote)
+ DemotePHIToStack(PN);
+
+ // Move the landingpad instruction back to the top of the landing pad block.
+ LPI->moveBefore(&UnwindBlock->front());
+ }
+}
+
+/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
+/// the function context and marking the call sites with the appropriate
+/// values. These values are used by the DWARF EH emitter.
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
+ SmallVector<ReturnInst *, 16> Returns;
+ SmallVector<InvokeInst *, 16> Invokes;
+ SmallSetVector<LandingPadInst *, 16> LPads;
+
+ // Look through the terminators of the basic blocks to find invokes.
+ for (BasicBlock &BB : F)
+ if (auto *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
+ if (Function *Callee = II->getCalledFunction())
+ if (Callee->getIntrinsicID() == Intrinsic::donothing) {
+ // Remove the NOP invoke.
+ BranchInst::Create(II->getNormalDest(), II);
+ II->eraseFromParent();
+ continue;
+ }
+
+ Invokes.push_back(II);
+ LPads.insert(II->getUnwindDest()->getLandingPadInst());
+ } else if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
+ Returns.push_back(RI);
+ }
+
+ if (Invokes.empty())
+ return false;
+
+ NumInvokes += Invokes.size();
+
+ lowerIncomingArguments(F);
+ lowerAcrossUnwindEdges(F, Invokes);
+
+ Value *FuncCtx =
+ setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
+ BasicBlock *EntryBB = &F.front();
+ IRBuilder<> Builder(EntryBB->getTerminator());
+
+ // Get a reference to the jump buffer.
+ Value *JBufPtr =
+ Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep");
+
+ // Save the frame pointer.
+ Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0,
+ "jbuf_fp_gep");
+
+ Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
+ Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
+
+ // Save the stack pointer.
+ Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2,
+ "jbuf_sp_gep");
+
+ Val = Builder.CreateCall(StackAddrFn, {}, "sp");
+ Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
+
+ // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf.
+ Builder.CreateCall(BuiltinSetupDispatchFn, {});
+
+ // Store a pointer to the function context so that the back-end will know
+ // where to look for it.
+ Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
+ Builder.CreateCall(FuncCtxFn, FuncCtxArg);
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values.
+ for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
+ insertCallSiteStore(Invokes[I], I + 1);
+
+ ConstantInt *CallSiteNum =
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
+
+ // Record the call site value for the back end so it stays associated with
+ // the invoke.
+ CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
+ }
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (BasicBlock &BB : F) {
+ if (&BB == &F.front())
+ continue;
+ for (Instruction &I : BB)
+ if (I.mayThrow())
+ insertCallSiteStore(&I, -1);
+ }
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register =
+ CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator());
+ Register->setDoesNotThrow();
+
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (BasicBlock &BB : F) {
+ if (&BB == &F.front())
+ continue;
+ for (Instruction &I : BB) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->getCalledFunction() != StackRestoreFn)
+ continue;
+ } else if (!isa<AllocaInst>(&I)) {
+ continue;
+ }
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(&I);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
+ }
+ }
+
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (ReturnInst *Return : Returns)
+ CallInst::Create(UnregisterFn, FuncCtx, "", Return);
+
+ return true;
+}
+
+bool SjLjEHPrepare::runOnFunction(Function &F) {
+ Module &M = *F.getParent();
+ RegisterFn = M.getOrInsertFunction(
+ "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy), nullptr);
+ UnregisterFn = M.getOrInsertFunction(
+ "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
+ PointerType::getUnqual(FunctionContextTy), nullptr);
+ FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+ BuiltinSetupDispatchFn =
+ Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch);
+ LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+ CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
+
+ bool Res = setupEntryBlockAndCallSites(F);
+ return Res;
+}
diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
new file mode 100644
index 000000000000..dba103e9bfb1
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -0,0 +1,245 @@
+//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "slotindexes"
+
+char SlotIndexes::ID = 0;
+INITIALIZE_PASS(SlotIndexes, "slotindexes",
+ "Slot index numbering", false, false)
+
+STATISTIC(NumLocalRenum, "Number of local renumberings");
+STATISTIC(NumGlobalRenum, "Number of global renumberings");
+
+void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void SlotIndexes::releaseMemory() {
+ mi2iMap.clear();
+ MBBRanges.clear();
+ idx2MBBMap.clear();
+ indexList.clear();
+ ileAllocator.Reset();
+}
+
+bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+
+ // Compute numbering as follows:
+ // Grab an iterator to the start of the index list.
+ // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+ // iterator in lock-step (though skipping it over indexes which have
+ // null pointers in the instruction field).
+ // At each iteration assert that the instruction pointed to in the index
+ // is the same one pointed to by the MI iterator. This
+
+ // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+ // only need to be set up once after the first numbering is computed.
+
+ mf = &fn;
+
+ // Check that the list contains only the sentinal.
+ assert(indexList.empty() && "Index list non-empty at initial numbering?");
+ assert(idx2MBBMap.empty() &&
+ "Index -> MBB mapping non-empty at initial numbering?");
+ assert(MBBRanges.empty() &&
+ "MBB -> Index mapping non-empty at initial numbering?");
+ assert(mi2iMap.empty() &&
+ "MachineInstr -> Index mapping non-empty at initial numbering?");
+
+ unsigned index = 0;
+ MBBRanges.resize(mf->getNumBlockIDs());
+ idx2MBBMap.reserve(mf->size());
+
+ indexList.push_back(createEntry(nullptr, index));
+
+ // Iterate over the function.
+ for (MachineBasicBlock &MBB : *mf) {
+ // Insert an index for the MBB start.
+ SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block);
+
+ for (MachineInstr &MI : MBB) {
+ if (MI.isDebugValue())
+ continue;
+
+ // Insert a store index for the instr.
+ indexList.push_back(createEntry(&MI, index += SlotIndex::InstrDist));
+
+ // Save this base index in the maps.
+ mi2iMap.insert(std::make_pair(
+ &MI, SlotIndex(&indexList.back(), SlotIndex::Slot_Block)));
+ }
+
+ // We insert one blank instructions between basic blocks.
+ indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist));
+
+ MBBRanges[MBB.getNumber()].first = blockStartIndex;
+ MBBRanges[MBB.getNumber()].second = SlotIndex(&indexList.back(),
+ SlotIndex::Slot_Block);
+ idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, &MBB));
+ }
+
+ // Sort the Idx2MBBMap
+ std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+
+ DEBUG(mf->print(dbgs(), this));
+
+ // And we're done!
+ return false;
+}
+
+void SlotIndexes::renumberIndexes() {
+ // Renumber updates the index of every element of the index list.
+ DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
+ ++NumGlobalRenum;
+
+ unsigned index = 0;
+
+ for (IndexList::iterator I = indexList.begin(), E = indexList.end();
+ I != E; ++I) {
+ I->setIndex(index);
+ index += SlotIndex::InstrDist;
+ }
+}
+
+// Renumber indexes locally after curItr was inserted, but failed to get a new
+// index.
+void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
+ // Number indexes with half the default spacing so we can catch up quickly.
+ const unsigned Space = SlotIndex::InstrDist/2;
+ static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM");
+
+ IndexList::iterator startItr = std::prev(curItr);
+ unsigned index = startItr->getIndex();
+ do {
+ curItr->setIndex(index += Space);
+ ++curItr;
+ // If the next index is bigger, we have caught up.
+ } while (curItr != indexList.end() && curItr->getIndex() <= index);
+
+ DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-'
+ << index << " ***\n");
+ ++NumLocalRenum;
+}
+
+// Repair indexes after adding and removing instructions.
+void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End) {
+ // FIXME: Is this really necessary? The only caller repairIntervalsForRange()
+ // does the same thing.
+ // Find anchor points, which are at the beginning/end of blocks or at
+ // instructions that already have indexes.
+ while (Begin != MBB->begin() && !hasIndex(*Begin))
+ --Begin;
+ while (End != MBB->end() && !hasIndex(*End))
+ ++End;
+
+ bool includeStart = (Begin == MBB->begin());
+ SlotIndex startIdx;
+ if (includeStart)
+ startIdx = getMBBStartIdx(MBB);
+ else
+ startIdx = getInstructionIndex(*Begin);
+
+ SlotIndex endIdx;
+ if (End == MBB->end())
+ endIdx = getMBBEndIdx(MBB);
+ else
+ endIdx = getInstructionIndex(*End);
+
+ // FIXME: Conceptually, this code is implementing an iterator on MBB that
+ // optionally includes an additional position prior to MBB->begin(), indicated
+ // by the includeStart flag. This is done so that we can iterate MIs in a MBB
+ // in parallel with SlotIndexes, but there should be a better way to do this.
+ IndexList::iterator ListB = startIdx.listEntry()->getIterator();
+ IndexList::iterator ListI = endIdx.listEntry()->getIterator();
+ MachineBasicBlock::iterator MBBI = End;
+ bool pastStart = false;
+ while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
+ assert(ListI->getIndex() >= startIdx.getIndex() &&
+ (includeStart || !pastStart) &&
+ "Decremented past the beginning of region to repair.");
+
+ MachineInstr *SlotMI = ListI->getInstr();
+ MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? &*MBBI : nullptr;
+ bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
+
+ if (SlotMI == MI && !MBBIAtBegin) {
+ --ListI;
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) {
+ if (MBBI != Begin)
+ --MBBI;
+ else
+ pastStart = true;
+ } else {
+ --ListI;
+ if (SlotMI)
+ removeMachineInstrFromMaps(*SlotMI);
+ }
+ }
+
+ // In theory this could be combined with the previous loop, but it is tricky
+ // to update the IndexList while we are iterating it.
+ for (MachineBasicBlock::iterator I = End; I != Begin;) {
+ --I;
+ MachineInstr &MI = *I;
+ if (!MI.isDebugValue() && mi2iMap.find(&MI) == mi2iMap.end())
+ insertMachineInstrInMaps(MI);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SlotIndexes::dump() const {
+ for (IndexList::const_iterator itr = indexList.begin();
+ itr != indexList.end(); ++itr) {
+ dbgs() << itr->getIndex() << " ";
+
+ if (itr->getInstr()) {
+ dbgs() << *itr->getInstr();
+ } else {
+ dbgs() << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i)
+ dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';'
+ << MBBRanges[i].second << ")\n";
+}
+#endif
+
+// Print a SlotIndex to a raw_ostream.
+void SlotIndex::print(raw_ostream &os) const {
+ if (isValid())
+ os << listEntry()->getIndex() << "Berd"[getSlot()];
+ else
+ os << "invalid";
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+// Dump a SlotIndex to stderr.
+LLVM_DUMP_METHOD void SlotIndex::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 000000000000..f10c98ef4e50
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,375 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SpillPlacement.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "spillplacement"
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequiredTransitive<EdgeBundles>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+ /// BiasN - Sum of blocks that prefer a spill.
+ BlockFrequency BiasN;
+ /// BiasP - Sum of blocks that prefer a register.
+ BlockFrequency BiasP;
+
+ /// Value - Output value of this node computed from the Bias and links.
+ /// This is always on of the values {-1, 0, 1}. A positive number means the
+ /// variable should go in a register through this bundle.
+ int Value;
+
+ typedef SmallVector<std::pair<BlockFrequency, unsigned>, 4> LinkVector;
+
+ /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+ /// bundles. The weights are all positive block frequencies.
+ LinkVector Links;
+
+ /// SumLinkWeights - Cached sum of the weights of all links + ThresHold.
+ BlockFrequency SumLinkWeights;
+
+ /// preferReg - Return true when this node prefers to be in a register.
+ bool preferReg() const {
+ // Undecided nodes (Value==0) go on the stack.
+ return Value > 0;
+ }
+
+ /// mustSpill - Return True if this node is so biased that it must spill.
+ bool mustSpill() const {
+ // We must spill if Bias < -sum(weights) or the MustSpill flag was set.
+ // BiasN is saturated when MustSpill is set, make sure this still returns
+ // true when the RHS saturates. Note that SumLinkWeights includes Threshold.
+ return BiasN >= BiasP + SumLinkWeights;
+ }
+
+ /// clear - Reset per-query data, but preserve frequencies that only depend on
+ // the CFG.
+ void clear(const BlockFrequency &Threshold) {
+ BiasN = BiasP = Value = 0;
+ SumLinkWeights = Threshold;
+ Links.clear();
+ }
+
+ /// addLink - Add a link to bundle b with weight w.
+ void addLink(unsigned b, BlockFrequency w) {
+ // Update cached sum.
+ SumLinkWeights += w;
+
+ // There can be multiple links to the same bundle, add them up.
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ if (I->second == b) {
+ I->first += w;
+ return;
+ }
+ // This must be the first link to b.
+ Links.push_back(std::make_pair(w, b));
+ }
+
+ /// addBias - Bias this node.
+ void addBias(BlockFrequency freq, BorderConstraint direction) {
+ switch (direction) {
+ default:
+ break;
+ case PrefReg:
+ BiasP += freq;
+ break;
+ case PrefSpill:
+ BiasN += freq;
+ break;
+ case MustSpill:
+ BiasN = BlockFrequency::getMaxFrequency();
+ break;
+ }
+ }
+
+ /// update - Recompute Value from Bias and Links. Return true when node
+ /// preference changes.
+ bool update(const Node nodes[], const BlockFrequency &Threshold) {
+ // Compute the weighted sum of inputs.
+ BlockFrequency SumN = BiasN;
+ BlockFrequency SumP = BiasP;
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) {
+ if (nodes[I->second].Value == -1)
+ SumN += I->first;
+ else if (nodes[I->second].Value == 1)
+ SumP += I->first;
+ }
+
+ // Each weighted sum is going to be less than the total frequency of the
+ // bundle. Ideally, we should simply set Value = sign(SumP - SumN), but we
+ // will add a dead zone around 0 for two reasons:
+ //
+ // 1. It avoids arbitrary bias when all links are 0 as is possible during
+ // initial iterations.
+ // 2. It helps tame rounding errors when the links nominally sum to 0.
+ //
+ bool Before = preferReg();
+ if (SumN >= SumP + Threshold)
+ Value = -1;
+ else if (SumP >= SumN + Threshold)
+ Value = 1;
+ else
+ Value = 0;
+ return Before != preferReg();
+ }
+
+ void getDissentingNeighbors(SparseSet<unsigned> &List,
+ const Node nodes[]) const {
+ for (const auto &Elt : Links) {
+ unsigned n = Elt.second;
+ // Neighbors that already have the same value are not going to
+ // change because of this node changing.
+ if (Value != nodes[n].Value)
+ List.insert(n);
+ }
+ }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ bundles = &getAnalysis<EdgeBundles>();
+ loops = &getAnalysis<MachineLoopInfo>();
+
+ assert(!nodes && "Leaking node array");
+ nodes = new Node[bundles->getNumBundles()];
+ TodoList.clear();
+ TodoList.setUniverse(bundles->getNumBundles());
+
+ // Compute total ingoing and outgoing block frequencies for all bundles.
+ BlockFrequencies.resize(mf.getNumBlockIDs());
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ setThreshold(MBFI->getEntryFreq());
+ for (auto &I : mf) {
+ unsigned Num = I.getNumber();
+ BlockFrequencies[Num] = MBFI->getBlockFreq(&I);
+ }
+
+ // We never change the function.
+ return false;
+}
+
+void SpillPlacement::releaseMemory() {
+ delete[] nodes;
+ nodes = nullptr;
+ TodoList.clear();
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+ TodoList.insert(n);
+ if (ActiveNodes->test(n))
+ return;
+ ActiveNodes->set(n);
+ nodes[n].clear(Threshold);
+
+ // Very large bundles usually come from big switches, indirect branches,
+ // landing pads, or loops with many 'continue' statements. It is difficult to
+ // allocate registers when so many different blocks are involved.
+ //
+ // Give a small negative bias to large bundles such that a substantial
+ // fraction of the connected blocks need to be interested before we consider
+ // expanding the region through the bundle. This helps compile time by
+ // limiting the number of blocks visited and the number of links in the
+ // Hopfield network.
+ if (bundles->getBlocks(n).size() > 100) {
+ nodes[n].BiasP = 0;
+ nodes[n].BiasN = (MBFI->getEntryFreq() / 16);
+ }
+}
+
+/// \brief Set the threshold for a given entry frequency.
+///
+/// Set the threshold relative to \c Entry. Since the threshold is used as a
+/// bound on the open interval (-Threshold;Threshold), 1 is the minimum
+/// threshold.
+void SpillPlacement::setThreshold(const BlockFrequency &Entry) {
+ // Apparently 2 is a good threshold when Entry==2^14, but we need to scale
+ // it. Divide by 2^13, rounding as appropriate.
+ uint64_t Freq = Entry.getFrequency();
+ uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12));
+ Threshold = std::max(UINT64_C(1), Scaled);
+}
+
+/// addConstraints - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) {
+ for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(),
+ E = LiveBlocks.end(); I != E; ++I) {
+ BlockFrequency Freq = BlockFrequencies[I->Number];
+
+ // Live-in to block?
+ if (I->Entry != DontCare) {
+ unsigned ib = bundles->getBundle(I->Number, 0);
+ activate(ib);
+ nodes[ib].addBias(Freq, I->Entry);
+ }
+
+ // Live-out from block?
+ if (I->Exit != DontCare) {
+ unsigned ob = bundles->getBundle(I->Number, 1);
+ activate(ob);
+ nodes[ob].addBias(Freq, I->Exit);
+ }
+ }
+}
+
+/// addPrefSpill - Same as addConstraints(PrefSpill)
+void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) {
+ for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
+ I != E; ++I) {
+ BlockFrequency Freq = BlockFrequencies[*I];
+ if (Strong)
+ Freq += Freq;
+ unsigned ib = bundles->getBundle(*I, 0);
+ unsigned ob = bundles->getBundle(*I, 1);
+ activate(ib);
+ activate(ob);
+ nodes[ib].addBias(Freq, PrefSpill);
+ nodes[ob].addBias(Freq, PrefSpill);
+ }
+}
+
+void SpillPlacement::addLinks(ArrayRef<unsigned> Links) {
+ for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E;
+ ++I) {
+ unsigned Number = *I;
+ unsigned ib = bundles->getBundle(Number, 0);
+ unsigned ob = bundles->getBundle(Number, 1);
+
+ // Ignore self-loops.
+ if (ib == ob)
+ continue;
+ activate(ib);
+ activate(ob);
+ BlockFrequency Freq = BlockFrequencies[Number];
+ nodes[ib].addLink(ob, Freq);
+ nodes[ob].addLink(ib, Freq);
+ }
+}
+
+bool SpillPlacement::scanActiveBundles() {
+ RecentPositive.clear();
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) {
+ update(n);
+ // A node that must spill, or a node without any links is not going to
+ // change its value ever again, so exclude it from iterations.
+ if (nodes[n].mustSpill())
+ continue;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+ return !RecentPositive.empty();
+}
+
+bool SpillPlacement::update(unsigned n) {
+ if (!nodes[n].update(nodes, Threshold))
+ return false;
+ nodes[n].getDissentingNeighbors(TodoList, nodes);
+ return true;
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+void SpillPlacement::iterate() {
+ // We do not need to push those node in the todolist.
+ // They are already been proceeded as part of the previous iteration.
+ RecentPositive.clear();
+
+ // Since the last iteration, the todolist have been augmented by calls
+ // to addConstraints, addLinks, and co.
+ // Update the network energy starting at this new frontier.
+ // The call to ::update will add the nodes that changed into the todolist.
+ unsigned Limit = bundles->getNumBundles() * 10;
+ while(Limit-- > 0 && !TodoList.empty()) {
+ unsigned n = TodoList.pop_back_val();
+ if (!update(n))
+ continue;
+ if (nodes[n].preferReg())
+ RecentPositive.push_back(n);
+ }
+}
+
+void SpillPlacement::prepare(BitVector &RegBundles) {
+ RecentPositive.clear();
+ TodoList.clear();
+ // Reuse RegBundles as our ActiveNodes vector.
+ ActiveNodes = &RegBundles;
+ ActiveNodes->clear();
+ ActiveNodes->resize(bundles->getNumBundles());
+}
+
+bool
+SpillPlacement::finish() {
+ assert(ActiveNodes && "Call prepare() first");
+
+ // Write preferences back to ActiveNodes.
+ bool Perfect = true;
+ for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n))
+ if (!nodes[n].preferReg()) {
+ ActiveNodes->reset(n);
+ Perfect = false;
+ }
+ ActiveNodes = nullptr;
+ return Perfect;
+}
diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 000000000000..9b9ecccf9049
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,170 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by prepare(), addConstraints(), and
+// finish() which are called by the register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_LIB_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/BlockFrequency.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+class MachineBlockFrequencyInfo;
+
+class SpillPlacement : public MachineFunctionPass {
+ struct Node;
+ const MachineFunction *MF;
+ const EdgeBundles *bundles;
+ const MachineLoopInfo *loops;
+ const MachineBlockFrequencyInfo *MBFI;
+ Node *nodes;
+
+ // Nodes that are active in the current computation. Owned by the prepare()
+ // caller.
+ BitVector *ActiveNodes;
+
+ // Nodes with active links. Populated by scanActiveBundles.
+ SmallVector<unsigned, 8> Linked;
+
+ // Nodes that went positive during the last call to scanActiveBundles or
+ // iterate.
+ SmallVector<unsigned, 8> RecentPositive;
+
+ // Block frequencies are computed once. Indexed by block number.
+ SmallVector<BlockFrequency, 8> BlockFrequencies;
+
+ /// Decision threshold. A node gets the output value 0 if the weighted sum of
+ /// its inputs falls in the open interval (-Threshold;Threshold).
+ BlockFrequency Threshold;
+
+ /// List of nodes that need to be updated in ::iterate.
+ SparseSet<unsigned> TodoList;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {}
+ ~SpillPlacement() override { releaseMemory(); }
+
+ /// BorderConstraint - A basic block has separate constraints for entry and
+ /// exit.
+ enum BorderConstraint {
+ DontCare, ///< Block doesn't care / variable not live.
+ PrefReg, ///< Block entry/exit prefers a register.
+ PrefSpill, ///< Block entry/exit prefers a stack slot.
+ PrefBoth, ///< Block entry prefers both register and stack.
+ MustSpill ///< A register is impossible, variable must be spilled.
+ };
+
+ /// BlockConstraint - Entry and exit constraints for a basic block.
+ struct BlockConstraint {
+ unsigned Number; ///< Basic block number (from MBB::getNumber()).
+ BorderConstraint Entry : 8; ///< Constraint on block entry.
+ BorderConstraint Exit : 8; ///< Constraint on block exit.
+
+ /// True when this block changes the value of the live range. This means
+ /// the block has a non-PHI def. When this is false, a live-in value on
+ /// the stack can be live-out on the stack without inserting a spill.
+ bool ChangesValue;
+ };
+
+ /// prepare - Reset state and prepare for a new spill placement computation.
+ /// @param RegBundles Bit vector to receive the edge bundles where the
+ /// variable should be kept in a register. Each bit
+ /// corresponds to an edge bundle, a set bit means the
+ /// variable should be kept in a register through the
+ /// bundle. A clear bit means the variable should be
+ /// spilled. This vector is retained.
+ void prepare(BitVector &RegBundles);
+
+ /// addConstraints - Add constraints and biases. This method may be called
+ /// more than once to accumulate constraints.
+ /// @param LiveBlocks Constraints for blocks that have the variable live in or
+ /// live out.
+ void addConstraints(ArrayRef<BlockConstraint> LiveBlocks);
+
+ /// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is
+ /// equivalent to calling addConstraint with identical BlockConstraints with
+ /// Entry = Exit = PrefSpill, and ChangesValue = false.
+ ///
+ /// @param Blocks Array of block numbers that prefer to spill in and out.
+ /// @param Strong When true, double the negative bias for these blocks.
+ void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong);
+
+ /// addLinks - Add transparent blocks with the given numbers.
+ void addLinks(ArrayRef<unsigned> Links);
+
+ /// scanActiveBundles - Perform an initial scan of all bundles activated by
+ /// addConstraints and addLinks, updating their state. Add all the bundles
+ /// that now prefer a register to RecentPositive.
+ /// Prepare internal data structures for iterate.
+ /// Return true is there are any positive nodes.
+ bool scanActiveBundles();
+
+ /// iterate - Update the network iteratively until convergence, or new bundles
+ /// are found.
+ void iterate();
+
+ /// getRecentPositive - Return an array of bundles that became positive during
+ /// the previous call to scanActiveBundles or iterate.
+ ArrayRef<unsigned> getRecentPositive() { return RecentPositive; }
+
+ /// finish - Compute the optimal spill code placement given the
+ /// constraints. No MustSpill constraints will be violated, and the smallest
+ /// possible number of PrefX constraints will be violated, weighted by
+ /// expected execution frequencies.
+ /// The selected bundles are returned in the bitvector passed to prepare().
+ /// @return True if a perfect solution was found, allowing the variable to be
+ /// in a register through all relevant bundles.
+ bool finish();
+
+ /// getBlockFrequency - Return the estimated block execution frequency per
+ /// function invocation.
+ BlockFrequency getBlockFrequency(unsigned Number) const {
+ return BlockFrequencies[Number];
+ }
+
+private:
+ bool runOnMachineFunction(MachineFunction&) override;
+ void getAnalysisUsage(AnalysisUsage&) const override;
+ void releaseMemory() override;
+
+ void activate(unsigned);
+ void setThreshold(const BlockFrequency &Entry);
+
+ bool update(unsigned);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h
new file mode 100644
index 000000000000..61ee508c8394
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/Spiller.h
@@ -0,0 +1,42 @@
+//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SPILLER_H
+#define LLVM_LIB_CODEGEN_SPILLER_H
+
+namespace llvm {
+
+ class LiveRangeEdit;
+ class MachineFunction;
+ class MachineFunctionPass;
+ class VirtRegMap;
+ class LiveIntervals;
+
+ /// Spiller interface.
+ ///
+ /// Implementations are utility classes which insert spill or remat code on
+ /// demand.
+ class Spiller {
+ virtual void anchor();
+ public:
+ virtual ~Spiller() = 0;
+
+ /// spill - Spill the LRE.getParent() live interval.
+ virtual void spill(LiveRangeEdit &LRE) = 0;
+ virtual void postOptimization(){};
+ };
+
+ /// Create and return a spiller that will insert spill code directly instead
+ /// of deferring though VirtRegMap.
+ Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
new file mode 100644
index 000000000000..07be24b18dd5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -0,0 +1,1525 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SplitKit.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumFinished, "Number of splits finished");
+STATISTIC(NumSimple, "Number of splits that were simple");
+STATISTIC(NumCopies, "Number of copies inserted for splitting");
+STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
+STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
+
+//===----------------------------------------------------------------------===//
+// Last Insert Point Analysis
+//===----------------------------------------------------------------------===//
+
+InsertPointAnalysis::InsertPointAnalysis(const LiveIntervals &lis,
+ unsigned BBNum)
+ : LIS(lis), LastInsertPoint(BBNum) {}
+
+SlotIndex
+InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI,
+ const MachineBasicBlock &MBB) {
+ unsigned Num = MBB.getNumber();
+ std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num];
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB);
+
+ SmallVector<const MachineBasicBlock *, 1> EHPadSucessors;
+ for (const MachineBasicBlock *SMBB : MBB.successors())
+ if (SMBB->isEHPad())
+ EHPadSucessors.push_back(SMBB);
+
+ // Compute insert points on the first call. The pair is independent of the
+ // current live interval.
+ if (!LIP.first.isValid()) {
+ MachineBasicBlock::const_iterator FirstTerm = MBB.getFirstTerminator();
+ if (FirstTerm == MBB.end())
+ LIP.first = MBBEnd;
+ else
+ LIP.first = LIS.getInstructionIndex(*FirstTerm);
+
+ // If there is a landing pad successor, also find the call instruction.
+ if (EHPadSucessors.empty())
+ return LIP.first;
+ // There may not be a call instruction (?) in which case we ignore LPad.
+ LIP.second = LIP.first;
+ for (MachineBasicBlock::const_iterator I = MBB.end(), E = MBB.begin();
+ I != E;) {
+ --I;
+ if (I->isCall()) {
+ LIP.second = LIS.getInstructionIndex(*I);
+ break;
+ }
+ }
+ }
+
+ // If CurLI is live into a landing pad successor, move the last insert point
+ // back to the call that may throw.
+ if (!LIP.second)
+ return LIP.first;
+
+ if (none_of(EHPadSucessors, [&](const MachineBasicBlock *EHPad) {
+ return LIS.isLiveInToMBB(CurLI, EHPad);
+ }))
+ return LIP.first;
+
+ // Find the value leaving MBB.
+ const VNInfo *VNI = CurLI.getVNInfoBefore(MBBEnd);
+ if (!VNI)
+ return LIP.first;
+
+ // If the value leaving MBB was defined after the call in MBB, it can't
+ // really be live-in to the landing pad. This can happen if the landing pad
+ // has a PHI, and this register is undef on the exceptional edge.
+ // <rdar://problem/10664933>
+ if (!SlotIndex::isEarlierInstr(VNI->def, LIP.second) && VNI->def < MBBEnd)
+ return LIP.first;
+
+ // Value is properly live-in to the landing pad.
+ // Only allow inserts before the call.
+ return LIP.second;
+}
+
+MachineBasicBlock::iterator
+InsertPointAnalysis::getLastInsertPointIter(const LiveInterval &CurLI,
+ MachineBasicBlock &MBB) {
+ SlotIndex LIP = getLastInsertPoint(CurLI, MBB);
+ if (LIP == LIS.getMBBEndIdx(&MBB))
+ return MBB.end();
+ return LIS.getInstructionFromIndex(LIP);
+}
+
+//===----------------------------------------------------------------------===//
+// Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+ const MachineLoopInfo &mli)
+ : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli),
+ TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr),
+ IPA(lis, MF.getNumBlockIDs()) {}
+
+void SplitAnalysis::clear() {
+ UseSlots.clear();
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ CurLI = nullptr;
+ DidRepairRange = false;
+}
+
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
+void SplitAnalysis::analyzeUses() {
+ assert(UseSlots.empty() && "Call clear first");
+
+ // First get all the defs from the interval values. This provides the correct
+ // slots for early clobbers.
+ for (const VNInfo *VNI : CurLI->valnos)
+ if (!VNI->isPHIDef() && !VNI->isUnused())
+ UseSlots.push_back(VNI->def);
+
+ // Get use slots form the use-def chain.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg))
+ if (!MO.isUndef())
+ UseSlots.push_back(LIS.getInstructionIndex(*MO.getParent()).getRegSlot());
+
+ array_pod_sort(UseSlots.begin(), UseSlots.end());
+
+ // Remove duplicates, keeping the smaller slot for each instruction.
+ // That is what we want for early clobbers.
+ UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(),
+ SlotIndex::isSameInstr),
+ UseSlots.end());
+
+ // Compute per-live block info.
+ if (!calcLiveBlockInfo()) {
+ // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
+ // I am looking at you, RegisterCoalescer!
+ DidRepairRange = true;
+ ++NumRepairs;
+ DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
+ const_cast<LiveIntervals&>(LIS)
+ .shrinkToUses(const_cast<LiveInterval*>(CurLI));
+ UseBlocks.clear();
+ ThroughBlocks.clear();
+ bool fixed = calcLiveBlockInfo();
+ (void)fixed;
+ assert(fixed && "Couldn't fix broken live interval");
+ }
+
+ DEBUG(dbgs() << "Analyze counted "
+ << UseSlots.size() << " instrs in "
+ << UseBlocks.size() << " blocks, through "
+ << NumThroughBlocks << " blocks.\n");
+}
+
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+bool SplitAnalysis::calcLiveBlockInfo() {
+ ThroughBlocks.resize(MF.getNumBlockIDs());
+ NumThroughBlocks = NumGapBlocks = 0;
+ if (CurLI->empty())
+ return true;
+
+ LiveInterval::const_iterator LVI = CurLI->begin();
+ LiveInterval::const_iterator LVE = CurLI->end();
+
+ SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+ UseI = UseSlots.begin();
+ UseE = UseSlots.end();
+
+ // Loop over basic blocks where CurLI is live.
+ MachineFunction::iterator MFI =
+ LIS.getMBBFromIndex(LVI->start)->getIterator();
+ for (;;) {
+ BlockInfo BI;
+ BI.MBB = &*MFI;
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ // If the block contains no uses, the range must be live through. At one
+ // point, RegisterCoalescer could create dangling ranges that ended
+ // mid-block.
+ if (UseI == UseE || *UseI >= Stop) {
+ ++NumThroughBlocks;
+ ThroughBlocks.set(BI.MBB->getNumber());
+ // The range shouldn't end mid-block if there are no uses. This shouldn't
+ // happen.
+ if (LVI->end < Stop)
+ return false;
+ } else {
+ // This block has uses. Find the first and last uses in the block.
+ BI.FirstInstr = *UseI;
+ assert(BI.FirstInstr >= Start);
+ do ++UseI;
+ while (UseI != UseE && *UseI < Stop);
+ BI.LastInstr = UseI[-1];
+ assert(BI.LastInstr < Stop);
+
+ // LVI is the first live segment overlapping MBB.
+ BI.LiveIn = LVI->start <= Start;
+
+ // When not live in, the first use should be a def.
+ if (!BI.LiveIn) {
+ assert(LVI->start == LVI->valno->def && "Dangling Segment start");
+ assert(LVI->start == BI.FirstInstr && "First instr should be a def");
+ BI.FirstDef = BI.FirstInstr;
+ }
+
+ // Look for gaps in the live range.
+ BI.LiveOut = true;
+ while (LVI->end < Stop) {
+ SlotIndex LastStop = LVI->end;
+ if (++LVI == LVE || LVI->start >= Stop) {
+ BI.LiveOut = false;
+ BI.LastInstr = LastStop;
+ break;
+ }
+
+ if (LastStop < LVI->start) {
+ // There is a gap in the live range. Create duplicate entries for the
+ // live-in snippet and the live-out snippet.
+ ++NumGapBlocks;
+
+ // Push the Live-in part.
+ BI.LiveOut = false;
+ UseBlocks.push_back(BI);
+ UseBlocks.back().LastInstr = LastStop;
+
+ // Set up BI for the live-out part.
+ BI.LiveIn = false;
+ BI.LiveOut = true;
+ BI.FirstInstr = BI.FirstDef = LVI->start;
+ }
+
+ // A Segment that starts in the middle of the block must be a def.
+ assert(LVI->start == LVI->valno->def && "Dangling Segment start");
+ if (!BI.FirstDef)
+ BI.FirstDef = LVI->start;
+ }
+
+ UseBlocks.push_back(BI);
+
+ // LVI is now at LVE or LVI->end >= Stop.
+ if (LVI == LVE)
+ break;
+ }
+
+ // Live segment ends exactly at Stop. Move to the next segment.
+ if (LVI->end == Stop && ++LVI == LVE)
+ break;
+
+ // Pick the next basic block.
+ if (LVI->start < Stop)
+ ++MFI;
+ else
+ MFI = LIS.getMBBFromIndex(LVI->start)->getIterator();
+ }
+
+ assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
+ return true;
+}
+
+unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
+ if (cli->empty())
+ return 0;
+ LiveInterval *li = const_cast<LiveInterval*>(cli);
+ LiveInterval::iterator LVI = li->begin();
+ LiveInterval::iterator LVE = li->end();
+ unsigned Count = 0;
+
+ // Loop over basic blocks where li is live.
+ MachineFunction::const_iterator MFI =
+ LIS.getMBBFromIndex(LVI->start)->getIterator();
+ SlotIndex Stop = LIS.getMBBEndIdx(&*MFI);
+ for (;;) {
+ ++Count;
+ LVI = li->advanceTo(LVI, Stop);
+ if (LVI == LVE)
+ return Count;
+ do {
+ ++MFI;
+ Stop = LIS.getMBBEndIdx(&*MFI);
+ } while (Stop <= LVI->start);
+ }
+}
+
+bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
+ unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+ const LiveInterval &Orig = LIS.getInterval(OrigReg);
+ assert(!Orig.empty() && "Splitting empty interval?");
+ LiveInterval::const_iterator I = Orig.find(Idx);
+
+ // Range containing Idx should begin at Idx.
+ if (I != Orig.end() && I->start <= Idx)
+ return I->start == Idx;
+
+ // Range does not contain Idx, previous must end at Idx.
+ return I != Orig.begin() && (--I)->end == Idx;
+}
+
+void SplitAnalysis::analyze(const LiveInterval *li) {
+ clear();
+ CurLI = li;
+ analyzeUses();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Split Editor
+//===----------------------------------------------------------------------===//
+
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa,
+ LiveIntervals &lis, VirtRegMap &vrm,
+ MachineDominatorTree &mdt,
+ MachineBlockFrequencyInfo &mbfi)
+ : SA(sa), AA(aa), LIS(lis), VRM(vrm),
+ MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt),
+ TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()),
+ TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()),
+ MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition),
+ RegAssign(Allocator) {}
+
+void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
+ Edit = &LRE;
+ SpillMode = SM;
+ OpenIdx = 0;
+ RegAssign.clear();
+ Values.clear();
+
+ // Reset the LiveRangeCalc instances needed for this spill mode.
+ LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+ if (SpillMode)
+ LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+ &LIS.getVNInfoAllocator());
+
+ // We don't need an AliasAnalysis since we will only be performing
+ // cheap-as-a-copy remats anyway.
+ Edit->anyRematerializable(nullptr);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SplitEditor::dump() const {
+ if (RegAssign.empty()) {
+ dbgs() << " empty\n";
+ return;
+ }
+
+ for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+ dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+ dbgs() << '\n';
+}
+#endif
+
+VNInfo *SplitEditor::defValue(unsigned RegIdx,
+ const VNInfo *ParentVNI,
+ SlotIndex Idx) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
+
+ // Create a new value.
+ VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
+
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator, bool> InsP =
+ Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id),
+ ValueForcePair(VNI, false)));
+
+ // This was the first time (RegIdx, ParentVNI) was mapped.
+ // Keep it as a simple def without any liveness.
+ if (InsP.second)
+ return VNI;
+
+ // If the previous value was a simple mapping, add liveness for it now.
+ if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
+ SlotIndex Def = OldVNI->def;
+ LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI));
+ // No longer a simple mapping. Switch to a complex, non-forced mapping.
+ InsP.first->second = ValueForcePair();
+ }
+
+ // This is a complex mapping, add liveness for VNI
+ SlotIndex Def = VNI->def;
+ LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
+
+ return VNI;
+}
+
+void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
+ assert(ParentVNI && "Mapping NULL value");
+ ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)];
+ VNInfo *VNI = VFP.getPointer();
+
+ // ParentVNI was either unmapped or already complex mapped. Either way, just
+ // set the force bit.
+ if (!VNI) {
+ VFP.setInt(true);
+ return;
+ }
+
+ // This was previously a single mapping. Make sure the old def is represented
+ // by a trivial live range.
+ SlotIndex Def = VNI->def;
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
+ LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
+ // Mark as complex mapped, forced.
+ VFP = ValueForcePair(nullptr, true);
+}
+
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineInstr *CopyMI = nullptr;
+ SlotIndex Def;
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
+
+ // We may be trying to avoid interference that ends at a deleted instruction,
+ // so always begin RegIdx 0 early and all others late.
+ bool Late = RegIdx != 0;
+
+ // Attempt cheap-as-a-copy rematerialization.
+ unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
+ LiveInterval &OrigLI = LIS.getInterval(Original);
+ VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
+ LiveRangeEdit::Remat RM(ParentVNI);
+ RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+
+ if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
+ Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+ ++NumRemats;
+ } else {
+ // Can't remat, just insert a copy from parent.
+ CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+ .addReg(Edit->getReg());
+ Def = LIS.getSlotIndexes()
+ ->insertMachineInstrInMaps(*CopyMI, Late)
+ .getRegSlot();
+ ++NumCopies;
+ }
+
+ // Define the value in Reg.
+ return defValue(RegIdx, ParentVNI, Def);
+}
+
+/// Create a new virtual register and live interval.
+unsigned SplitEditor::openIntv() {
+ // Create the complement as index 0.
+ if (Edit->empty())
+ Edit->createEmptyInterval();
+
+ // Create the open interval.
+ OpenIdx = Edit->size();
+ Edit->createEmptyInterval();
+ return OpenIdx;
+}
+
+void SplitEditor::selectIntv(unsigned Idx) {
+ assert(Idx != 0 && "Cannot select the complement interval");
+ assert(Idx < Edit->size() && "Can only select previously opened interval");
+ DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n');
+ OpenIdx = Idx;
+}
+
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvBefore");
+ DEBUG(dbgs() << " enterIntvBefore " << Idx);
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvAfter");
+ DEBUG(dbgs() << " enterIntvAfter " << Idx);
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvAfter called with invalid index");
+
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
+ std::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+ SlotIndex End = LIS.getMBBEndIdx(&MBB);
+ SlotIndex Last = End.getPrevSlot();
+ DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return End;
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id);
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+ SA.getLastSplitPointIter(&MBB));
+ RegAssign.insert(VNI->def, End, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
+void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
+ useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
+}
+
+void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before useIntv");
+ DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
+
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx);
+
+ // The interval must be live beyond the instruction at Idx.
+ SlotIndex Boundary = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Boundary.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Boundary);
+ assert(MI && "No instruction at index");
+
+ // In spill mode, make live ranges as short as possible by inserting the copy
+ // before MI. This is only possible if that instruction doesn't redefine the
+ // value. The inserted COPY is not a kill, and we don't need to recompute
+ // the source live range. The spiller also won't try to hoist this copy.
+ if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) &&
+ MI->readsVirtualRegister(Edit->getReg())) {
+ forceRecompute(0, ParentVNI);
+ defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return Idx;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(),
+ std::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+ DEBUG(dbgs() << " leaveIntvBefore " << Idx);
+
+ // The interval must be live into the instruction at Idx.
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
+ }
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+ SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
+
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Start;
+ }
+
+ VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+ MBB.SkipPHIsAndLabels(MBB.begin()));
+ RegAssign.insert(Start, VNI->def, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
+
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before overlapIntv");
+ const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+ assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) &&
+ "Parent changes value in extended range");
+ assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+ "Range cannot span basic blocks");
+
+ // The complement interval will be extended as needed by LRCalc.extend().
+ if (ParentVNI)
+ forceRecompute(0, ParentVNI);
+ DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
+
+//===----------------------------------------------------------------------===//
+// Spill modes
+//===----------------------------------------------------------------------===//
+
+void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
+ DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n");
+ RegAssignMap::iterator AssignI;
+ AssignI.setMap(RegAssign);
+
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ SlotIndex Def = Copies[i]->def;
+ MachineInstr *MI = LIS.getInstructionFromIndex(Def);
+ assert(MI && "No instruction for back-copy");
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::iterator MBBI(MI);
+ bool AtBegin;
+ do AtBegin = MBBI == MBB->begin();
+ while (!AtBegin && (--MBBI)->isDebugValue());
+
+ DEBUG(dbgs() << "Removing " << Def << '\t' << *MI);
+ LIS.removeVRegDefAt(*LI, Def);
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+
+ // Adjust RegAssign if a register assignment is killed at Def. We want to
+ // avoid calculating the live range of the source register if possible.
+ AssignI.find(Def.getPrevSlot());
+ if (!AssignI.valid() || AssignI.start() >= Def)
+ continue;
+ // If MI doesn't kill the assigned register, just leave it.
+ if (AssignI.stop() != Def)
+ continue;
+ unsigned RegIdx = AssignI.value();
+ if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) {
+ DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
+ forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
+ } else {
+ SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot();
+ DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
+ AssignI.setStop(Kill);
+ }
+ }
+}
+
+MachineBasicBlock*
+SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB) {
+ if (MBB == DefMBB)
+ return MBB;
+ assert(MDT.dominates(DefMBB, MBB) && "MBB must be dominated by the def.");
+
+ const MachineLoopInfo &Loops = SA.Loops;
+ const MachineLoop *DefLoop = Loops.getLoopFor(DefMBB);
+ MachineDomTreeNode *DefDomNode = MDT[DefMBB];
+
+ // Best candidate so far.
+ MachineBasicBlock *BestMBB = MBB;
+ unsigned BestDepth = UINT_MAX;
+
+ for (;;) {
+ const MachineLoop *Loop = Loops.getLoopFor(MBB);
+
+ // MBB isn't in a loop, it doesn't get any better. All dominators have a
+ // higher frequency by definition.
+ if (!Loop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth 0\n");
+ return MBB;
+ }
+
+ // We'll never be able to exit the DefLoop.
+ if (Loop == DefLoop) {
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " in the same loop\n");
+ return MBB;
+ }
+
+ // Least busy dominator seen so far.
+ unsigned Depth = Loop->getLoopDepth();
+ if (Depth < BestDepth) {
+ BestMBB = MBB;
+ BestDepth = Depth;
+ DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#"
+ << MBB->getNumber() << " at depth " << Depth << '\n');
+ }
+
+ // Leave loop by going to the immediate dominator of the loop header.
+ // This is a bigger stride than simply walking up the dominator tree.
+ MachineDomTreeNode *IDom = MDT[Loop->getHeader()]->getIDom();
+
+ // Too far up the dominator tree?
+ if (!IDom || !MDT.dominates(DefDomNode, IDom))
+ return BestMBB;
+
+ MBB = IDom->getBlock();
+ }
+}
+
+void SplitEditor::computeRedundantBackCopies(
+ DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) {
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
+ LiveInterval *Parent = &Edit->getParent();
+ SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums());
+ SmallPtrSet<VNInfo *, 8> DominatedVNIs;
+
+ // Aggregate VNIs having the same value as ParentVNI.
+ for (VNInfo *VNI : LI->valnos) {
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ EqualVNs[ParentVNI->id].insert(VNI);
+ }
+
+ // For VNI aggregation of each ParentVNI, collect dominated, i.e.,
+ // redundant VNIs to BackCopies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ if (!NotToHoistSet.count(ParentVNI->id))
+ continue;
+ SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
+ SmallPtrSetIterator<VNInfo *> It2 = It1;
+ for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) {
+ It2 = It1;
+ for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) {
+ if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2))
+ continue;
+
+ MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def);
+ MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def);
+ if (MBB1 == MBB2) {
+ DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) : (*It1));
+ } else if (MDT.dominates(MBB1, MBB2)) {
+ DominatedVNIs.insert(*It2);
+ } else if (MDT.dominates(MBB2, MBB1)) {
+ DominatedVNIs.insert(*It1);
+ }
+ }
+ }
+ if (!DominatedVNIs.empty()) {
+ forceRecompute(0, ParentVNI);
+ for (auto VNI : DominatedVNIs) {
+ BackCopies.push_back(VNI);
+ }
+ DominatedVNIs.clear();
+ }
+ }
+}
+
+/// For SM_Size mode, find a common dominator for all the back-copies for
+/// the same ParentVNI and hoist the backcopies to the dominator BB.
+/// For SM_Speed mode, if the common dominator is hot and it is not beneficial
+/// to do the hoisting, simply remove the dominated backcopies for the same
+/// ParentVNI.
+void SplitEditor::hoistCopies() {
+ // Get the complement interval, always RegIdx 0.
+ LiveInterval *LI = &LIS.getInterval(Edit->get(0));
+ LiveInterval *Parent = &Edit->getParent();
+
+ // Track the nearest common dominator for all back-copies for each ParentVNI,
+ // indexed by ParentVNI->id.
+ typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair;
+ SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums());
+ // The total cost of all the back-copies for each ParentVNI.
+ SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums());
+ // The ParentVNI->id set for which hoisting back-copies are not beneficial
+ // for Speed.
+ DenseSet<unsigned> NotToHoistSet;
+
+ // Find the nearest common dominator for parent values with multiple
+ // back-copies. If a single back-copy dominates, put it in DomPair.second.
+ for (VNInfo *VNI : LI->valnos) {
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ assert(ParentVNI && "Parent not live at complement def");
+
+ // Don't hoist remats. The complement is probably going to disappear
+ // completely anyway.
+ if (Edit->didRematerialize(ParentVNI))
+ continue;
+
+ MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def);
+
+ DomPair &Dom = NearestDom[ParentVNI->id];
+
+ // Keep directly defined parent values. This is either a PHI or an
+ // instruction in the complement range. All other copies of ParentVNI
+ // should be eliminated.
+ if (VNI->def == ParentVNI->def) {
+ DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n');
+ Dom = DomPair(ValMBB, VNI->def);
+ continue;
+ }
+ // Skip the singly mapped values. There is nothing to gain from hoisting a
+ // single back-copy.
+ if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) {
+ DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n');
+ continue;
+ }
+
+ if (!Dom.first) {
+ // First time we see ParentVNI. VNI dominates itself.
+ Dom = DomPair(ValMBB, VNI->def);
+ } else if (Dom.first == ValMBB) {
+ // Two defs in the same block. Pick the earlier def.
+ if (!Dom.second.isValid() || VNI->def < Dom.second)
+ Dom.second = VNI->def;
+ } else {
+ // Different basic blocks. Check if one dominates.
+ MachineBasicBlock *Near =
+ MDT.findNearestCommonDominator(Dom.first, ValMBB);
+ if (Near == ValMBB)
+ // Def ValMBB dominates.
+ Dom = DomPair(ValMBB, VNI->def);
+ else if (Near != Dom.first)
+ // None dominate. Hoist to common dominator, need new def.
+ Dom = DomPair(Near, SlotIndex());
+ Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB);
+ }
+
+ DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def
+ << " for parent " << ParentVNI->id << '@' << ParentVNI->def
+ << " hoist to BB#" << Dom.first->getNumber() << ' '
+ << Dom.second << '\n');
+ }
+
+ // Insert the hoisted copies.
+ for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
+ DomPair &Dom = NearestDom[i];
+ if (!Dom.first || Dom.second.isValid())
+ continue;
+ // This value needs a hoisted copy inserted at the end of Dom.first.
+ VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
+ // Get a less loopy dominator than Dom.first.
+ Dom.first = findShallowDominator(Dom.first, DefMBB);
+ if (SpillMode == SM_Speed &&
+ MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) {
+ NotToHoistSet.insert(ParentVNI->id);
+ continue;
+ }
+ SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
+ Dom.second =
+ defFromParent(0, ParentVNI, Last, *Dom.first,
+ SA.getLastSplitPointIter(Dom.first))->def;
+ }
+
+ // Remove redundant back-copies that are now known to be dominated by another
+ // def with the same value.
+ SmallVector<VNInfo*, 8> BackCopies;
+ for (VNInfo *VNI : LI->valnos) {
+ if (VNI->isUnused())
+ continue;
+ VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
+ const DomPair &Dom = NearestDom[ParentVNI->id];
+ if (!Dom.first || Dom.second == VNI->def ||
+ NotToHoistSet.count(ParentVNI->id))
+ continue;
+ BackCopies.push_back(VNI);
+ forceRecompute(0, ParentVNI);
+ }
+
+ // If it is not beneficial to hoist all the BackCopies, simply remove
+ // redundant BackCopies in speed mode.
+ if (SpillMode == SM_Speed && !NotToHoistSet.empty())
+ computeRedundantBackCopies(NotToHoistSet, BackCopies);
+
+ removeBackCopies(BackCopies);
+}
+
+
+/// transferValues - Transfer all possible values to the new live ranges.
+/// Values that were rematerialized are left alone, they need LRCalc.extend().
+bool SplitEditor::transferValues() {
+ bool Skipped = false;
+ RegAssignMap::const_iterator AssignI = RegAssign.begin();
+ for (const LiveRange::Segment &S : Edit->getParent()) {
+ DEBUG(dbgs() << " blit " << S << ':');
+ VNInfo *ParentVNI = S.valno;
+ // RegAssign has holes where RegIdx 0 should be used.
+ SlotIndex Start = S.start;
+ AssignI.advanceTo(Start);
+ do {
+ unsigned RegIdx;
+ SlotIndex End = S.end;
+ if (!AssignI.valid()) {
+ RegIdx = 0;
+ } else if (AssignI.start() <= Start) {
+ RegIdx = AssignI.value();
+ if (AssignI.stop() < End) {
+ End = AssignI.stop();
+ ++AssignI;
+ }
+ } else {
+ RegIdx = 0;
+ End = std::min(End, AssignI.start());
+ }
+
+ // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
+ DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
+ LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
+
+ // Check for a simply defined value that can be blitted directly.
+ ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
+ if (VNInfo *VNI = VFP.getPointer()) {
+ DEBUG(dbgs() << ':' << VNI->id);
+ LR.addSegment(LiveInterval::Segment(Start, End, VNI));
+ Start = End;
+ continue;
+ }
+
+ // Skip values with forced recomputation.
+ if (VFP.getInt()) {
+ DEBUG(dbgs() << "(recalc)");
+ Skipped = true;
+ Start = End;
+ continue;
+ }
+
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
+
+ // This value has multiple defs in RegIdx, but it wasn't rematerialized,
+ // so the live range is accurate. Add live-in blocks in [Start;End) to the
+ // LiveInBlocks.
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
+ SlotIndex BlockStart, BlockEnd;
+ std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(&*MBB);
+
+ // The first block may be live-in, or it may have its own def.
+ if (Start != BlockStart) {
+ VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped value");
+ DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
+ // MBB has its own def. Is it also live-out?
+ if (BlockEnd <= End)
+ LRC.setLiveOutValue(&*MBB, VNI);
+
+ // Skip to the next block for live-in.
+ ++MBB;
+ BlockStart = BlockEnd;
+ }
+
+ // Handle the live-in blocks covered by [Start;End).
+ assert(Start <= BlockStart && "Expected live-in block");
+ while (BlockStart < End) {
+ DEBUG(dbgs() << ">BB#" << MBB->getNumber());
+ BlockEnd = LIS.getMBBEndIdx(&*MBB);
+ if (BlockStart == ParentVNI->def) {
+ // This block has the def of a parent PHI, so it isn't live-in.
+ assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
+ VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
+ assert(VNI && "Missing def for complex mapped parent PHI");
+ if (End >= BlockEnd)
+ LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
+ } else {
+ // This block needs a live-in value. The last block covered may not
+ // be live-out.
+ if (End < BlockEnd)
+ LRC.addLiveInBlock(LR, MDT[&*MBB], End);
+ else {
+ // Live-through, and we don't know the value.
+ LRC.addLiveInBlock(LR, MDT[&*MBB]);
+ LRC.setLiveOutValue(&*MBB, nullptr);
+ }
+ }
+ BlockStart = BlockEnd;
+ ++MBB;
+ }
+ Start = End;
+ } while (Start != S.end);
+ DEBUG(dbgs() << '\n');
+ }
+
+ LRCalc[0].calculateValues();
+ if (SpillMode)
+ LRCalc[1].calculateValues();
+
+ return Skipped;
+}
+
+void SplitEditor::extendPHIKillRanges() {
+ // Extend live ranges to be live-out for successor PHI values.
+ for (const VNInfo *PHIVNI : Edit->getParent().valnos) {
+ if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+ LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
+
+ // Check whether PHI is dead.
+ const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def);
+ assert(Segment != nullptr && "Missing segment for VNI");
+ if (Segment->end == PHIVNI->def.getDeadSlot()) {
+ // This is a dead PHI. Remove it.
+ LR.removeSegment(*Segment, true);
+ continue;
+ }
+
+ LiveRangeCalc &LRC = getLRCalc(RegIdx);
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex End = LIS.getMBBEndIdx(*PI);
+ SlotIndex LastUse = End.getPrevSlot();
+ // The predecessor may not have a live-out value. That is OK, like an
+ // undef PHI operand.
+ if (Edit->getParent().liveAt(LastUse)) {
+ assert(RegAssign.lookup(LastUse) == RegIdx &&
+ "Different register assignment in phi predecessor");
+ LRC.extend(LR, End);
+ }
+ }
+ }
+}
+
+/// rewriteAssigned - Rewrite all uses of Edit->getReg().
+void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = *RI;
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ // LiveDebugVariables should have handled all DBG_VALUE instructions.
+ if (MI->isDebugValue()) {
+ DEBUG(dbgs() << "Zapping " << *MI);
+ MO.setReg(0);
+ continue;
+ }
+
+ // <undef> operands don't really read the register, so it doesn't matter
+ // which register we choose. When the use operand is tied to a def, we must
+ // use the same register as the def, so just do that always.
+ SlotIndex Idx = LIS.getInstructionIndex(*MI);
+ if (MO.isDef() || MO.isUndef())
+ Idx = Idx.getRegSlot(MO.isEarlyClobber());
+
+ // Rewrite to the mapped register at Idx.
+ unsigned RegIdx = RegAssign.lookup(Idx);
+ LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
+ MO.setReg(LI->reg);
+ DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ << Idx << ':' << RegIdx << '\t' << *MI);
+
+ // Extend liveness to Idx if the instruction reads reg.
+ if (!ExtendRanges || MO.isUndef())
+ continue;
+
+ // Skip instructions that don't read Reg.
+ if (MO.isDef()) {
+ if (!MO.getSubReg() && !MO.isEarlyClobber())
+ continue;
+ // We may wan't to extend a live range for a partial redef, or for a use
+ // tied to an early clobber.
+ Idx = Idx.getPrevSlot();
+ if (!Edit->getParent().liveAt(Idx))
+ continue;
+ } else
+ Idx = Idx.getRegSlot(true);
+
+ getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot());
+ }
+}
+
+void SplitEditor::deleteRematVictims() {
+ SmallVector<MachineInstr*, 8> Dead;
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
+ LiveInterval *LI = &LIS.getInterval(*I);
+ for (const LiveRange::Segment &S : LI->segments) {
+ // Dead defs end at the dead slot.
+ if (S.end != S.valno->def.getDeadSlot())
+ continue;
+ if (S.valno->isPHIDef())
+ continue;
+ MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
+ assert(MI && "Missing instruction for dead def");
+ MI->addRegisterDead(LI->reg, &TRI);
+
+ if (!MI->allDefsAreDead())
+ continue;
+
+ DEBUG(dbgs() << "All defs dead: " << *MI);
+ Dead.push_back(MI);
+ }
+ }
+
+ if (Dead.empty())
+ return;
+
+ Edit->eliminateDeadDefs(Dead, None, &AA);
+}
+
+void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
+ ++NumFinished;
+
+ // At this point, the live intervals in Edit contain VNInfos corresponding to
+ // the inserted copies.
+
+ // Add the original defs from the parent interval.
+ for (const VNInfo *ParentVNI : Edit->getParent().valnos) {
+ if (ParentVNI->isUnused())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
+ defValue(RegIdx, ParentVNI, ParentVNI->def);
+
+ // Force rematted values to be recomputed everywhere.
+ // The new live ranges may be truncated.
+ if (Edit->didRematerialize(ParentVNI))
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ forceRecompute(i, ParentVNI);
+ }
+
+ // Hoist back-copies to the complement interval when in spill mode.
+ switch (SpillMode) {
+ case SM_Partition:
+ // Leave all back-copies as is.
+ break;
+ case SM_Size:
+ case SM_Speed:
+ // hoistCopies will behave differently between size and speed.
+ hoistCopies();
+ }
+
+ // Transfer the simply mapped values, check if any are skipped.
+ bool Skipped = transferValues();
+
+ // Rewrite virtual registers, possibly extending ranges.
+ rewriteAssigned(Skipped);
+
+ if (Skipped)
+ extendPHIKillRanges();
+ else
+ ++NumSimple;
+
+ // Delete defs that were rematted everywhere.
+ if (Skipped)
+ deleteRematVictims();
+
+ // Get rid of unused values and set phi-kill flags.
+ for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) {
+ LiveInterval &LI = LIS.getInterval(*I);
+ LI.RenumberValues();
+ }
+
+ // Provide a reverse mapping from original indices to Edit ranges.
+ if (LRMap) {
+ LRMap->clear();
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+ LRMap->push_back(i);
+ }
+
+ // Now check if any registers were separated into multiple components.
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
+ // Don't use iterators, they are invalidated by create() below.
+ unsigned VReg = Edit->get(i);
+ LiveInterval &LI = LIS.getInterval(VReg);
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ LIS.splitSeparateComponents(LI, SplitLIs);
+ unsigned Original = VRM.getOriginal(VReg);
+ for (LiveInterval *SplitLI : SplitLIs)
+ VRM.setIsSplitFromReg(SplitLI->reg, Original);
+
+ // The new intervals all map back to i.
+ if (LRMap)
+ LRMap->resize(Edit->size(), i);
+ }
+
+ // Calculate spill weight and allocation hints for new intervals.
+ Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops, MBFI);
+
+ assert(!LRMap || LRMap->size() == Edit->size());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Single Block Splitting
+//===----------------------------------------------------------------------===//
+
+bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
+ bool SingleInstrs) const {
+ // Always split for multiple instructions.
+ if (!BI.isOneInstr())
+ return true;
+ // Don't split for single instructions unless explicitly requested.
+ if (!SingleInstrs)
+ return false;
+ // Splitting a live-through range always makes progress.
+ if (BI.LiveIn && BI.LiveOut)
+ return true;
+ // No point in isolating a copy. It has no register class constraints.
+ if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike())
+ return false;
+ // Finally, don't isolate an end point that was created by earlier splits.
+ return isOriginalEndpoint(BI.FirstInstr);
+}
+
+void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) {
+ openIntv();
+ SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber());
+ SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr,
+ LastSplitPoint));
+ if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) {
+ useIntv(SegStart, leaveIntvAfter(BI.LastInstr));
+ } else {
+ // The last use is after the last valid split point.
+ SlotIndex SegStop = leaveIntvBefore(LastSplitPoint);
+ useIntv(SegStart, SegStop);
+ overlapIntv(SegStop, BI.LastInstr);
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Global Live Range Splitting Support
+//===----------------------------------------------------------------------===//
+
+// These methods support a method of global live range splitting that uses a
+// global algorithm to decide intervals for CFG edges. They will insert split
+// points and color intervals in basic blocks while avoiding interference.
+//
+// Note that splitSingleBlock is also useful for blocks where both CFG edges
+// are on the stack.
+
+void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter){
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+
+ DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
+ << ") intf " << LeaveBefore << '-' << EnterAfter
+ << ", live-through " << IntvIn << " -> " << IntvOut);
+
+ assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+
+ assert((!LeaveBefore || LeaveBefore < Stop) && "Interference after block");
+ assert((!IntvIn || !LeaveBefore || LeaveBefore > Start) && "Impossible intf");
+ assert((!EnterAfter || EnterAfter >= Start) && "Interference before block");
+
+ MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+
+ if (!IntvOut) {
+ DEBUG(dbgs() << ", spill on entry.\n");
+ //
+ // <<<<<<<<< Possible LeaveBefore interference.
+ // |-----------| Live through.
+ // -____________ Spill on entry.
+ //
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAtTop(*MBB);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (!IntvIn) {
+ DEBUG(dbgs() << ", reload on exit.\n");
+ //
+ // >>>>>>> Possible EnterAfter interference.
+ // |-----------| Live through.
+ // ___________-- Reload on exit.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAtEnd(*MBB);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ (void)Idx;
+ return;
+ }
+
+ if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
+ DEBUG(dbgs() << ", straight through.\n");
+ //
+ // |-----------| Live through.
+ // ------------- Straight through, same intv, no interference.
+ //
+ selectIntv(IntvOut);
+ useIntv(Start, Stop);
+ return;
+ }
+
+ // We cannot legally insert splits after LSP.
+ SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+ assert((!IntvOut || !EnterAfter || EnterAfter < LSP) && "Impossible intf");
+
+ if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
+ LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
+ DEBUG(dbgs() << ", switch avoiding interference.\n");
+ //
+ // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ------======= Switch intervals between interference.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx;
+ if (LeaveBefore && LeaveBefore < LSP) {
+ Idx = enterIntvBefore(LeaveBefore);
+ useIntv(Idx, Stop);
+ } else {
+ Idx = enterIntvAtEnd(*MBB);
+ }
+ selectIntv(IntvIn);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ DEBUG(dbgs() << ", create local intv for interference.\n");
+ //
+ // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference.
+ // |-----------| Live through.
+ // ==---------== Switch intervals before/after interference.
+ //
+ assert(LeaveBefore <= EnterAfter && "Missed case");
+
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ selectIntv(IntvIn);
+ Idx = leaveIntvBefore(LeaveBefore);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+}
+
+
+void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore) {
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
+ << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
+ << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+
+ assert(IntvIn && "Must have register in");
+ assert(BI.LiveIn && "Must be live-in");
+ assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
+
+ if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) {
+ DEBUG(dbgs() << " before interference.\n");
+ //
+ // <<< Interference after kill.
+ // |---o---x | Killed in block.
+ // ========= Use IntvIn everywhere.
+ //
+ selectIntv(IntvIn);
+ useIntv(Start, BI.LastInstr);
+ return;
+ }
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) {
+ //
+ // <<< Possible interference after last use.
+ // |---o---o---| Live-out on stack.
+ // =========____ Leave IntvIn after last use.
+ //
+ // < Interference after last use.
+ // |---o---o--o| Live-out on stack, late last use.
+ // ============ Copy to stack after LSP, overlap IntvIn.
+ // \_____ Stack interval is live-out.
+ //
+ if (BI.LastInstr < LSP) {
+ DEBUG(dbgs() << ", spill after last use before interference.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvAfter(BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ } else {
+ DEBUG(dbgs() << ", spill before last split point.\n");
+ selectIntv(IntvIn);
+ SlotIndex Idx = leaveIntvBefore(LSP);
+ overlapIntv(Idx, BI.LastInstr);
+ useIntv(Start, Idx);
+ assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+ }
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvIn. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ unsigned LocalIntv = openIntv();
+ (void)LocalIntv;
+ DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+
+ if (!BI.LiveOut || BI.LastInstr < LSP) {
+ //
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o---| Live-out on stack.
+ // =====----____ Leave IntvIn before interference, then spill.
+ //
+ SlotIndex To = leaveIntvAfter(BI.LastInstr);
+ SlotIndex From = enterIntvBefore(LeaveBefore);
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+ return;
+ }
+
+ // <<<<<<< Interference overlapping uses.
+ // |---o---o--o| Live-out on stack, late last use.
+ // =====------- Copy to stack before LSP, overlap LocalIntv.
+ // \_____ Stack interval is live-out.
+ //
+ SlotIndex To = leaveIntvBefore(LSP);
+ overlapIntv(To, BI.LastInstr);
+ SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
+ useIntv(From, To);
+ selectIntv(IntvIn);
+ useIntv(Start, From);
+ assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter) {
+ SlotIndex Start, Stop;
+ std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+ << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
+ << ", reg-out " << IntvOut << ", enter after " << EnterAfter
+ << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+
+ SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+ assert(IntvOut && "Must have register out");
+ assert(BI.LiveOut && "Must be live-out");
+ assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
+
+ if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) {
+ DEBUG(dbgs() << " after interference.\n");
+ //
+ // >>>> Interference before def.
+ // | o---o---| Defined in block.
+ // ========= Use IntvOut everywhere.
+ //
+ selectIntv(IntvOut);
+ useIntv(BI.FirstInstr, Stop);
+ return;
+ }
+
+ if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) {
+ DEBUG(dbgs() << ", reload after interference.\n");
+ //
+ // >>>> Interference before def.
+ // |---o---o---| Live-through, stack-in.
+ // ____========= Enter IntvOut before first use.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstInstr));
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+ return;
+ }
+
+ // The interference is overlapping somewhere we wanted to use IntvOut. That
+ // means we need to create a local interval that can be allocated a
+ // different register.
+ DEBUG(dbgs() << ", interference overlaps uses.\n");
+ //
+ // >>>>>>> Interference overlapping uses.
+ // |---o---o---| Live-through, stack-in.
+ // ____---====== Create local interval for interference range.
+ //
+ selectIntv(IntvOut);
+ SlotIndex Idx = enterIntvAfter(EnterAfter);
+ useIntv(Idx, Stop);
+ assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+ openIntv();
+ SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr));
+ useIntv(From, Idx);
+}
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
new file mode 100644
index 000000000000..a9684942885e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -0,0 +1,503 @@
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_SPLITKIT_H
+#define LLVM_LIB_CODEGEN_SPLITKIT_H
+
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class ConnectedVNInfoEqClasses;
+class LiveInterval;
+class LiveIntervals;
+class LiveRangeEdit;
+class MachineBlockFrequencyInfo;
+class MachineInstr;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+class VNInfo;
+class raw_ostream;
+
+/// Determines the latest safe point in a block in which we can insert a split,
+/// spill or other instruction related with CurLI.
+class LLVM_LIBRARY_VISIBILITY InsertPointAnalysis {
+private:
+ const LiveIntervals &LIS;
+
+ /// Last legal insert point in each basic block in the current function.
+ /// The first entry is the first terminator, the second entry is the
+ /// last valid point to insert a split or spill for a variable that is
+ /// live into a landing pad successor.
+ SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastInsertPoint;
+
+ SlotIndex computeLastInsertPoint(const LiveInterval &CurLI,
+ const MachineBasicBlock &MBB);
+
+public:
+ InsertPointAnalysis(const LiveIntervals &lis, unsigned BBNum);
+
+ /// Return the base index of the last valid insert point for \pCurLI in \pMBB.
+ SlotIndex getLastInsertPoint(const LiveInterval &CurLI,
+ const MachineBasicBlock &MBB) {
+ unsigned Num = MBB.getNumber();
+ // Inline the common simple case.
+ if (LastInsertPoint[Num].first.isValid() &&
+ !LastInsertPoint[Num].second.isValid())
+ return LastInsertPoint[Num].first;
+ return computeLastInsertPoint(CurLI, MBB);
+ }
+
+ /// Returns the last insert point as an iterator for \pCurLI in \pMBB.
+ MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI,
+ MachineBasicBlock &MBB);
+};
+
+/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
+/// opportunities.
+class LLVM_LIBRARY_VISIBILITY SplitAnalysis {
+public:
+ const MachineFunction &MF;
+ const VirtRegMap &VRM;
+ const LiveIntervals &LIS;
+ const MachineLoopInfo &Loops;
+ const TargetInstrInfo &TII;
+
+ /// Additional information about basic blocks where the current variable is
+ /// live. Such a block will look like one of these templates:
+ ///
+ /// 1. | o---x | Internal to block. Variable is only live in this block.
+ /// 2. |---x | Live-in, kill.
+ /// 3. | o---| Def, live-out.
+ /// 4. |---x o---| Live-in, kill, def, live-out. Counted by NumGapBlocks.
+ /// 5. |---o---o---| Live-through with uses or defs.
+ /// 6. |-----------| Live-through without uses. Counted by NumThroughBlocks.
+ ///
+ /// Two BlockInfo entries are created for template 4. One for the live-in
+ /// segment, and one for the live-out segment. These entries look as if the
+ /// block were split in the middle where the live range isn't live.
+ ///
+ /// Live-through blocks without any uses don't get BlockInfo entries. They
+ /// are simply listed in ThroughBlocks instead.
+ ///
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ SlotIndex FirstInstr; ///< First instr accessing current reg.
+ SlotIndex LastInstr; ///< Last instr accessing current reg.
+ SlotIndex FirstDef; ///< First non-phi valno->def, or SlotIndex().
+ bool LiveIn; ///< Current reg is live in.
+ bool LiveOut; ///< Current reg is live out.
+
+ /// isOneInstr - Returns true when this BlockInfo describes a single
+ /// instruction.
+ bool isOneInstr() const {
+ return SlotIndex::isSameInstr(FirstInstr, LastInstr);
+ }
+ };
+
+private:
+ // Current live interval.
+ const LiveInterval *CurLI;
+
+ /// Insert Point Analysis.
+ InsertPointAnalysis IPA;
+
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
+
+ /// UseBlocks - Blocks where CurLI has uses.
+ SmallVector<BlockInfo, 8> UseBlocks;
+
+ /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
+ /// the live range has a gap.
+ unsigned NumGapBlocks;
+
+ /// ThroughBlocks - Block numbers where CurLI is live through without uses.
+ BitVector ThroughBlocks;
+
+ /// NumThroughBlocks - Number of live-through blocks.
+ unsigned NumThroughBlocks;
+
+ /// DidRepairRange - analyze was forced to shrinkToUses().
+ bool DidRepairRange;
+
+ // Sumarize statistics by counting instructions using CurLI.
+ void analyzeUses();
+
+ /// calcLiveBlockInfo - Compute per-block information about CurLI.
+ bool calcLiveBlockInfo();
+
+public:
+ SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+ const MachineLoopInfo &mli);
+
+ /// analyze - set CurLI to the specified interval, and analyze how it may be
+ /// split.
+ void analyze(const LiveInterval *li);
+
+ /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
+ /// by analyze(). This really shouldn't happen, but sometimes the coalescer
+ /// can create live ranges that end in mid-air.
+ bool didRepairRange() const { return DidRepairRange; }
+
+ /// clear - clear all data structures so SplitAnalysis is ready to analyze a
+ /// new interval.
+ void clear();
+
+ /// getParent - Return the last analyzed interval.
+ const LiveInterval &getParent() const { return *CurLI; }
+
+ /// isOriginalEndpoint - Return true if the original live range was killed or
+ /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
+ /// and 'use' for an early-clobber def.
+ /// This can be used to recognize code inserted by earlier live range
+ /// splitting.
+ bool isOriginalEndpoint(SlotIndex Idx) const;
+
+ /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI.
+ /// This include both use and def operands, at most one entry per instruction.
+ ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; }
+
+ /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
+ /// where CurLI has uses.
+ ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
+
+ /// getNumThroughBlocks - Return the number of through blocks.
+ unsigned getNumThroughBlocks() const { return NumThroughBlocks; }
+
+ /// isThroughBlock - Return true if CurLI is live through MBB without uses.
+ bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); }
+
+ /// getThroughBlocks - Return the set of through blocks.
+ const BitVector &getThroughBlocks() const { return ThroughBlocks; }
+
+ /// getNumLiveBlocks - Return the number of blocks where CurLI is live.
+ unsigned getNumLiveBlocks() const {
+ return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks();
+ }
+
+ /// countLiveBlocks - Return the number of blocks where li is live. This is
+ /// guaranteed to return the same number as getNumLiveBlocks() after calling
+ /// analyze(li).
+ unsigned countLiveBlocks(const LiveInterval *li) const;
+
+ typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+
+ /// shouldSplitSingleBlock - Returns true if it would help to create a local
+ /// live range for the instructions in BI. There is normally no benefit to
+ /// creating a live range for a single instruction, but it does enable
+ /// register class inflation if the instruction has a restricted register
+ /// class.
+ ///
+ /// @param BI The block to be isolated.
+ /// @param SingleInstrs True when single instructions should be isolated.
+ bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const;
+
+ SlotIndex getLastSplitPoint(unsigned Num) {
+ return IPA.getLastInsertPoint(*CurLI, *MF.getBlockNumbered(Num));
+ }
+
+ MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {
+ return IPA.getLastInsertPointIter(*CurLI, *BB);
+ }
+};
+
+
+/// SplitEditor - Edit machine code and LiveIntervals for live range
+/// splitting.
+///
+/// - Create a SplitEditor from a SplitAnalysis.
+/// - Start a new live interval with openIntv.
+/// - Mark the places where the new interval is entered using enterIntv*
+/// - Mark the ranges where the new interval is used with useIntv*
+/// - Mark the places where the interval is exited with exitIntv*.
+/// - Finish the current interval with closeIntv and repeat from 2.
+/// - Rewrite instructions with finish().
+///
+class LLVM_LIBRARY_VISIBILITY SplitEditor {
+ SplitAnalysis &SA;
+ AliasAnalysis &AA;
+ LiveIntervals &LIS;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ MachineDominatorTree &MDT;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineBlockFrequencyInfo &MBFI;
+
+public:
+
+ /// ComplementSpillMode - Select how the complement live range should be
+ /// created. SplitEditor automatically creates interval 0 to contain
+ /// anything that isn't added to another interval. This complement interval
+ /// can get quite complicated, and it can sometimes be an advantage to allow
+ /// it to overlap the other intervals. If it is going to spill anyway, no
+ /// registers are wasted by keeping a value in two places at the same time.
+ enum ComplementSpillMode {
+ /// SM_Partition(Default) - Try to create the complement interval so it
+ /// doesn't overlap any other intervals, and the original interval is
+ /// partitioned. This may require a large number of back copies and extra
+ /// PHI-defs. Only segments marked with overlapIntv will be overlapping.
+ SM_Partition,
+
+ /// SM_Size - Overlap intervals to minimize the number of inserted COPY
+ /// instructions. Copies to the complement interval are hoisted to their
+ /// common dominator, so only one COPY is required per value in the
+ /// complement interval. This also means that no extra PHI-defs need to be
+ /// inserted in the complement interval.
+ SM_Size,
+
+ /// SM_Speed - Overlap intervals to minimize the expected execution
+ /// frequency of the inserted copies. This is very similar to SM_Size, but
+ /// the complement interval may get some extra PHI-defs.
+ SM_Speed
+ };
+
+private:
+
+ /// Edit - The current parent register and new intervals created.
+ LiveRangeEdit *Edit;
+
+ /// Index into Edit of the currently open interval.
+ /// The index 0 is used for the complement, so the first interval started by
+ /// openIntv will be 1.
+ unsigned OpenIdx;
+
+ /// The current spill mode, selected by reset().
+ ComplementSpillMode SpillMode;
+
+ typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+ /// Allocator for the interval map. This will eventually be shared with
+ /// SlotIndexes and LiveIntervals.
+ RegAssignMap::Allocator Allocator;
+
+ /// RegAssign - Map of the assigned register indexes.
+ /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+ /// Idx.
+ RegAssignMap RegAssign;
+
+ typedef PointerIntPair<VNInfo*, 1> ValueForcePair;
+ typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap;
+
+ /// Values - keep track of the mapping from parent values to values in the new
+ /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
+ ///
+ /// 1. No entry - the value is not mapped to Edit.get(RegIdx).
+ /// 2. (Null, false) - the value is mapped to multiple values in
+ /// Edit.get(RegIdx). Each value is represented by a minimal live range at
+ /// its def. The full live range can be inferred exactly from the range
+ /// of RegIdx in RegAssign.
+ /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and
+ /// the live range must be recomputed using LiveRangeCalc::extend().
+ /// 4. (VNI, false) The value is mapped to a single new value.
+ /// The new value has no live ranges anywhere.
+ ValueMap Values;
+
+ /// LRCalc - Cache for computing live ranges and SSA update. Each instance
+ /// can only handle non-overlapping live ranges, so use a separate
+ /// LiveRangeCalc instance for the complement interval when in spill mode.
+ LiveRangeCalc LRCalc[2];
+
+ /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the
+ /// complement interval can overlap the other intervals, so it gets its own
+ /// LRCalc instance. When not in spill mode, all intervals can share one.
+ LiveRangeCalc &getLRCalc(unsigned RegIdx) {
+ return LRCalc[SpillMode != SM_Partition && RegIdx != 0];
+ }
+
+ /// defValue - define a value in RegIdx from ParentVNI at Idx.
+ /// Idx does not have to be ParentVNI->def, but it must be contained within
+ /// ParentVNI's live range in ParentLI. The new value is added to the value
+ /// map.
+ /// Return the new LI value.
+ VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+
+ /// forceRecompute - Force the live range of ParentVNI in RegIdx to be
+ /// recomputed by LiveRangeCalc::extend regardless of the number of defs.
+ /// This is used for values whose live range doesn't match RegAssign exactly.
+ /// They could have rematerialized, or back-copies may have been moved.
+ void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI);
+
+ /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+ /// rematerialization or a COPY from parent. Return the new value.
+ VNInfo *defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+ /// removeBackCopies - Remove the copy instructions that defines the values
+ /// in the vector in the complement interval.
+ void removeBackCopies(SmallVectorImpl<VNInfo*> &Copies);
+
+ /// getShallowDominator - Returns the least busy dominator of MBB that is
+ /// also dominated by DefMBB. Busy is measured by loop depth.
+ MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB);
+
+ /// Find out all the backCopies dominated by others.
+ void computeRedundantBackCopies(DenseSet<unsigned> &NotToHoistSet,
+ SmallVectorImpl<VNInfo *> &BackCopies);
+
+ /// Hoist back-copies to the complement interval. It tries to hoist all
+ /// the back-copies to one BB if it is beneficial, or else simply remove
+ /// redundant backcopies dominated by others.
+ void hoistCopies();
+
+ /// transferValues - Transfer values to the new ranges.
+ /// Return true if any ranges were skipped.
+ bool transferValues();
+
+ /// extendPHIKillRanges - Extend the ranges of all values killed by original
+ /// parent PHIDefs.
+ void extendPHIKillRanges();
+
+ /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+ void rewriteAssigned(bool ExtendRanges);
+
+ /// deleteRematVictims - Delete defs that are dead after rematerializing.
+ void deleteRematVictims();
+
+public:
+ /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+ /// Newly created intervals will be appended to newIntervals.
+ SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, LiveIntervals&,
+ VirtRegMap&, MachineDominatorTree&,
+ MachineBlockFrequencyInfo &);
+
+ /// reset - Prepare for a new split.
+ void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition);
+
+ /// Create a new virtual register and live interval.
+ /// Return the interval index, starting from 1. Interval index 0 is the
+ /// implicit complement interval.
+ unsigned openIntv();
+
+ /// currentIntv - Return the current interval index.
+ unsigned currentIntv() const { return OpenIdx; }
+
+ /// selectIntv - Select a previously opened interval index.
+ void selectIntv(unsigned Idx);
+
+ /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+ /// If the parent interval is not live before Idx, a COPY is not inserted.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvBefore(SlotIndex Idx);
+
+ /// enterIntvAfter - Enter the open interval after the instruction at Idx.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAfter(SlotIndex Idx);
+
+ /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+ /// Use the open interval from the inserted copy to the MBB end.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in MBB should use OpenLI.
+ void useIntv(const MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in range should use OpenLI.
+ void useIntv(SlotIndex Start, SlotIndex End);
+
+ /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvBefore(SlotIndex Idx);
+
+ /// leaveIntvAtTop - Leave the interval at the top of MBB.
+ /// Add liveness from the MBB top to the copy.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+ /// overlapIntv - Indicate that all instructions in range should use the open
+ /// interval, but also let the complement interval be live.
+ ///
+ /// This doubles the register pressure, but is sometimes required to deal with
+ /// register uses after the last valid split point.
+ ///
+ /// The Start index should be a return value from a leaveIntv* call, and End
+ /// should be in the same basic block. The parent interval must have the same
+ /// value across the range.
+ ///
+ void overlapIntv(SlotIndex Start, SlotIndex End);
+
+ /// finish - after all the new live ranges have been created, compute the
+ /// remaining live range, and rewrite instructions to use the new registers.
+ /// @param LRMap When not null, this vector will map each live range in Edit
+ /// back to the indices returned by openIntv.
+ /// There may be extra indices created by dead code elimination.
+ void finish(SmallVectorImpl<unsigned> *LRMap = nullptr);
+
+ /// dump - print the current interval mapping to dbgs().
+ void dump() const;
+
+ // ===--- High level methods ---===
+
+ /// splitSingleBlock - Split CurLI into a separate live interval around the
+ /// uses in a single block. This is intended to be used as part of a larger
+ /// split, and doesn't call finish().
+ void splitSingleBlock(const SplitAnalysis::BlockInfo &BI);
+
+ /// splitLiveThroughBlock - Split CurLI in the given block such that it
+ /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
+ /// the block, but they will be ignored when placing split points.
+ ///
+ /// @param MBBNum Block number.
+ /// @param IntvIn Interval index entering the block.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitLiveThroughBlock(unsigned MBBNum,
+ unsigned IntvIn, SlotIndex LeaveBefore,
+ unsigned IntvOut, SlotIndex EnterAfter);
+
+ /// splitRegInBlock - Split CurLI in the given block such that it enters the
+ /// block in IntvIn and leaves it on the stack (or not at all). Split points
+ /// are placed in a way that avoids putting uses in the stack interval. This
+ /// may require creating a local interval when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvIn Interval index entering the block. Not 0.
+ /// @param LeaveBefore When set, leave IntvIn before this point.
+ void splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvIn, SlotIndex LeaveBefore);
+
+ /// splitRegOutBlock - Split CurLI in the given block such that it enters the
+ /// block on the stack (or isn't live-in at all) and leaves it in IntvOut.
+ /// Split points are placed to avoid interference and such that the uses are
+ /// not in the stack interval. This may require creating a local interval
+ /// when there is interference.
+ ///
+ /// @param BI Block descriptor.
+ /// @param IntvOut Interval index leaving the block.
+ /// @param EnterAfter When set, enter IntvOut after this point.
+ void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+ unsigned IntvOut, SlotIndex EnterAfter);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
new file mode 100644
index 000000000000..87cd470d5690
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -0,0 +1,1126 @@
+//===-- StackColoring.cpp -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements the stack-coloring optimization that looks for
+// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
+// which represent the possible lifetime of stack slots. It attempts to
+// merge disjoint stack slots and reduce the used stack space.
+// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
+//
+// TODO: In the future we plan to improve stack coloring in the following ways:
+// 1. Allow merging multiple small slots into a single larger slot at different
+// offsets.
+// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stackcoloring"
+
+static cl::opt<bool>
+DisableColoring("no-stack-coloring",
+ cl::init(false), cl::Hidden,
+ cl::desc("Disable stack coloring"));
+
+/// The user may write code that uses allocas outside of the declared lifetime
+/// zone. This can happen when the user returns a reference to a local
+/// data-structure. We can detect these cases and decide not to optimize the
+/// code. If this flag is enabled, we try to save the user. This option
+/// is treated as overriding LifetimeStartOnFirstUse below.
+static cl::opt<bool>
+ProtectFromEscapedAllocas("protect-from-escaped-allocas",
+ cl::init(false), cl::Hidden,
+ cl::desc("Do not optimize lifetime zones that "
+ "are broken"));
+
+/// Enable enhanced dataflow scheme for lifetime analysis (treat first
+/// use of stack slot as start of slot lifetime, as opposed to looking
+/// for LIFETIME_START marker). See "Implementation notes" below for
+/// more info.
+static cl::opt<bool>
+LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
+ cl::init(true), cl::Hidden,
+ cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
+
+
+STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
+STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
+STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+
+//
+// Implementation Notes:
+// ---------------------
+//
+// Consider the following motivating example:
+//
+// int foo() {
+// char b1[1024], b2[1024];
+// if (...) {
+// char b3[1024];
+// <uses of b1, b3>;
+// return x;
+// } else {
+// char b4[1024], b5[1024];
+// <uses of b2, b4, b5>;
+// return y;
+// }
+// }
+//
+// In the code above, "b3" and "b4" are declared in distinct lexical
+// scopes, meaning that it is easy to prove that they can share the
+// same stack slot. Variables "b1" and "b2" are declared in the same
+// scope, meaning that from a lexical point of view, their lifetimes
+// overlap. From a control flow pointer of view, however, the two
+// variables are accessed in disjoint regions of the CFG, thus it
+// should be possible for them to share the same stack slot. An ideal
+// stack allocation for the function above would look like:
+//
+// slot 0: b1, b2
+// slot 1: b3, b4
+// slot 2: b5
+//
+// Achieving this allocation is tricky, however, due to the way
+// lifetime markers are inserted. Here is a simplified view of the
+// control flow graph for the code above:
+//
+// +------ block 0 -------+
+// 0| LIFETIME_START b1, b2 |
+// 1| <test 'if' condition> |
+// +-----------------------+
+// ./ \.
+// +------ block 1 -------+ +------ block 2 -------+
+// 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
+// 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
+// 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
+// +-----------------------+ +-----------------------+
+// \. /.
+// +------ block 3 -------+
+// 8| <cleanupcode> |
+// 9| LIFETIME_END b1, b2 |
+// 10| return |
+// +-----------------------+
+//
+// If we create live intervals for the variables above strictly based
+// on the lifetime markers, we'll get the set of intervals on the
+// left. If we ignore the lifetime start markers and instead treat a
+// variable's lifetime as beginning with the first reference to the
+// var, then we get the intervals on the right.
+//
+// LIFETIME_START First Use
+// b1: [0,9] [3,4] [8,9]
+// b2: [0,9] [6,9]
+// b3: [2,4] [3,4]
+// b4: [5,7] [6,7]
+// b5: [5,7] [6,7]
+//
+// For the intervals on the left, the best we can do is overlap two
+// variables (b3 and b4, for example); this gives us a stack size of
+// 4*1024 bytes, not ideal. When treating first-use as the start of a
+// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
+// byte stack (better).
+//
+// Relying entirely on first-use of stack slots is problematic,
+// however, due to the fact that optimizations can sometimes migrate
+// uses of a variable outside of its lifetime start/end region. Here
+// is an example:
+//
+// int bar() {
+// char b1[1024], b2[1024];
+// if (...) {
+// <uses of b2>
+// return y;
+// } else {
+// <uses of b1>
+// while (...) {
+// char b3[1024];
+// <uses of b3>
+// }
+// }
+// }
+//
+// Before optimization, the control flow graph for the code above
+// might look like the following:
+//
+// +------ block 0 -------+
+// 0| LIFETIME_START b1, b2 |
+// 1| <test 'if' condition> |
+// +-----------------------+
+// ./ \.
+// +------ block 1 -------+ +------- block 2 -------+
+// 2| <uses of b2> | 3| <uses of b1> |
+// +-----------------------+ +-----------------------+
+// | |
+// | +------- block 3 -------+ <-\.
+// | 4| <while condition> | |
+// | +-----------------------+ |
+// | / | |
+// | / +------- block 4 -------+
+// \ / 5| LIFETIME_START b3 | |
+// \ / 6| <uses of b3> | |
+// \ / 7| LIFETIME_END b3 | |
+// \ | +------------------------+ |
+// \ | \ /
+// +------ block 5 -----+ \---------------
+// 8| <cleanupcode> |
+// 9| LIFETIME_END b1, b2 |
+// 10| return |
+// +---------------------+
+//
+// During optimization, however, it can happen that an instruction
+// computing an address in "b3" (for example, a loop-invariant GEP) is
+// hoisted up out of the loop from block 4 to block 2. [Note that
+// this is not an actual load from the stack, only an instruction that
+// computes the address to be loaded]. If this happens, there is now a
+// path leading from the first use of b3 to the return instruction
+// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
+// now larger than if we were computing live intervals strictly based
+// on lifetime markers. In the example above, this lengthened lifetime
+// would mean that it would appear illegal to overlap b3 with b2.
+//
+// To deal with this such cases, the code in ::collectMarkers() below
+// tries to identify "degenerate" slots -- those slots where on a single
+// forward pass through the CFG we encounter a first reference to slot
+// K before we hit the slot K lifetime start marker. For such slots,
+// we fall back on using the lifetime start marker as the beginning of
+// the variable's lifetime. NB: with this implementation, slots can
+// appear degenerate in cases where there is unstructured control flow:
+//
+// if (q) goto mid;
+// if (x > 9) {
+// int b[100];
+// memcpy(&b[0], ...);
+// mid: b[k] = ...;
+// abc(&b);
+// }
+//
+// If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
+// before visiting the memcpy block (which will contain the lifetime start
+// for "b" then it will appear that 'b' has a degenerate lifetime.
+//
+
+//===----------------------------------------------------------------------===//
+// StackColoring Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// StackColoring - A machine pass for merging disjoint stack allocations,
+/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
+class StackColoring : public MachineFunctionPass {
+ MachineFrameInfo *MFI;
+ MachineFunction *MF;
+
+ /// A class representing liveness information for a single basic block.
+ /// Each bit in the BitVector represents the liveness property
+ /// for a different stack slot.
+ struct BlockLifetimeInfo {
+ /// Which slots BEGINs in each basic block.
+ BitVector Begin;
+ /// Which slots ENDs in each basic block.
+ BitVector End;
+ /// Which slots are marked as LIVE_IN, coming into each basic block.
+ BitVector LiveIn;
+ /// Which slots are marked as LIVE_OUT, coming out of each basic block.
+ BitVector LiveOut;
+ };
+
+ /// Maps active slots (per bit) for each basic block.
+ typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap;
+ LivenessMap BlockLiveness;
+
+ /// Maps serial numbers to basic blocks.
+ DenseMap<const MachineBasicBlock*, int> BasicBlocks;
+ /// Maps basic blocks to a serial number.
+ SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
+
+ /// Maps liveness intervals for each slot.
+ SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
+ /// VNInfo is used for the construction of LiveIntervals.
+ VNInfo::Allocator VNInfoAllocator;
+ /// SlotIndex analysis object.
+ SlotIndexes *Indexes;
+ /// The stack protector object.
+ StackProtector *SP;
+
+ /// The list of lifetime markers found. These markers are to be removed
+ /// once the coloring is done.
+ SmallVector<MachineInstr*, 8> Markers;
+
+ /// Record the FI slots for which we have seen some sort of
+ /// lifetime marker (either start or end).
+ BitVector InterestingSlots;
+
+ /// FI slots that need to be handled conservatively (for these
+ /// slots lifetime-start-on-first-use is disabled).
+ BitVector ConservativeSlots;
+
+ /// Number of iterations taken during data flow analysis.
+ unsigned NumIterations;
+
+public:
+ static char ID;
+ StackColoring() : MachineFunctionPass(ID) {
+ initializeStackColoringPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ /// Debug.
+ void dump() const;
+ void dumpIntervals() const;
+ void dumpBB(MachineBasicBlock *MBB) const;
+ void dumpBV(const char *tag, const BitVector &BV) const;
+
+ /// Removes all of the lifetime marker instructions from the function.
+ /// \returns true if any markers were removed.
+ bool removeAllMarkers();
+
+ /// Scan the machine function and find all of the lifetime markers.
+ /// Record the findings in the BEGIN and END vectors.
+ /// \returns the number of markers found.
+ unsigned collectMarkers(unsigned NumSlot);
+
+ /// Perform the dataflow calculation and calculate the lifetime for each of
+ /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
+ /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
+ /// in and out blocks.
+ void calculateLocalLiveness();
+
+ /// Returns TRUE if we're using the first-use-begins-lifetime method for
+ /// this slot (if FALSE, then the start marker is treated as start of lifetime).
+ bool applyFirstUse(int Slot) {
+ if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas)
+ return false;
+ if (ConservativeSlots.test(Slot))
+ return false;
+ return true;
+ }
+
+ /// Examines the specified instruction and returns TRUE if the instruction
+ /// represents the start or end of an interesting lifetime. The slot or slots
+ /// starting or ending are added to the vector "slots" and "isStart" is set
+ /// accordingly.
+ /// \returns True if inst contains a lifetime start or end
+ bool isLifetimeStartOrEnd(const MachineInstr &MI,
+ SmallVector<int, 4> &slots,
+ bool &isStart);
+
+ /// Construct the LiveIntervals for the slots.
+ void calculateLiveIntervals(unsigned NumSlots);
+
+ /// Go over the machine function and change instructions which use stack
+ /// slots to use the joint slots.
+ void remapInstructions(DenseMap<int, int> &SlotRemap);
+
+ /// The input program may contain instructions which are not inside lifetime
+ /// markers. This can happen due to a bug in the compiler or due to a bug in
+ /// user code (for example, returning a reference to a local variable).
+ /// This procedure checks all of the instructions in the function and
+ /// invalidates lifetime ranges which do not contain all of the instructions
+ /// which access that frame slot.
+ void removeInvalidSlotRanges();
+
+ /// Map entries which point to other entries to their destination.
+ /// A->B->C becomes A->C.
+ void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
+
+ /// Used in collectMarkers
+ typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap;
+};
+} // end anonymous namespace
+
+char StackColoring::ID = 0;
+char &llvm::StackColoringID = StackColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackColoring,
+ "stack-coloring", "Merge disjoint stack slots", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_END(StackColoring,
+ "stack-coloring", "Merge disjoint stack slots", false, false)
+
+void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<SlotIndexes>();
+ AU.addRequired<StackProtector>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+#ifndef NDEBUG
+
+LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
+ const BitVector &BV) const {
+ DEBUG(dbgs() << tag << " : { ");
+ for (unsigned I = 0, E = BV.size(); I != E; ++I)
+ DEBUG(dbgs() << BV.test(I) << " ");
+ DEBUG(dbgs() << "}\n");
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
+ LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ const BlockLifetimeInfo &BlockInfo = BI->second;
+
+ dumpBV("BEGIN", BlockInfo.Begin);
+ dumpBV("END", BlockInfo.End);
+ dumpBV("LIVE_IN", BlockInfo.LiveIn);
+ dumpBV("LIVE_OUT", BlockInfo.LiveOut);
+}
+
+LLVM_DUMP_METHOD void StackColoring::dump() const {
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
+ DEBUG(dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
+ << MBB->getName() << "]\n");
+ DEBUG(dumpBB(MBB));
+ }
+}
+
+LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
+ for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
+ DEBUG(dbgs() << "Interval[" << I << "]:\n");
+ DEBUG(Intervals[I]->dump());
+ }
+}
+
+#endif // not NDEBUG
+
+static inline int getStartOrEndSlot(const MachineInstr &MI)
+{
+ assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
+ "Expected LIFETIME_START or LIFETIME_END op");
+ const MachineOperand &MO = MI.getOperand(0);
+ int Slot = MO.getIndex();
+ if (Slot >= 0)
+ return Slot;
+ return -1;
+}
+
+//
+// At the moment the only way to end a variable lifetime is with
+// a VARIABLE_LIFETIME op (which can't contain a start). If things
+// change and the IR allows for a single inst that both begins
+// and ends lifetime(s), this interface will need to be reworked.
+//
+bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
+ SmallVector<int, 4> &slots,
+ bool &isStart)
+{
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ int Slot = getStartOrEndSlot(MI);
+ if (Slot < 0)
+ return false;
+ if (!InterestingSlots.test(Slot))
+ return false;
+ slots.push_back(Slot);
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ isStart = false;
+ return true;
+ }
+ if (! applyFirstUse(Slot)) {
+ isStart = true;
+ return true;
+ }
+ } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) {
+ if (! MI.isDebugValue()) {
+ bool found = false;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isFI())
+ continue;
+ int Slot = MO.getIndex();
+ if (Slot<0)
+ continue;
+ if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
+ slots.push_back(Slot);
+ found = true;
+ }
+ }
+ if (found) {
+ isStart = true;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+unsigned StackColoring::collectMarkers(unsigned NumSlot)
+{
+ unsigned MarkersFound = 0;
+ BlockBitVecMap SeenStartMap;
+ InterestingSlots.clear();
+ InterestingSlots.resize(NumSlot);
+ ConservativeSlots.clear();
+ ConservativeSlots.resize(NumSlot);
+
+ // number of start and end lifetime ops for each slot
+ SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
+ SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
+
+ // Step 1: collect markers and populate the "InterestingSlots"
+ // and "ConservativeSlots" sets.
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
+
+ // Compute the set of slots for which we've seen a START marker but have
+ // not yet seen an END marker at this point in the walk (e.g. on entry
+ // to this bb).
+ BitVector BetweenStartEnd;
+ BetweenStartEnd.resize(NumSlot);
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI);
+ if (I != SeenStartMap.end()) {
+ BetweenStartEnd |= I->second;
+ }
+ }
+
+ // Walk the instructions in the block to look for start/end ops.
+ for (MachineInstr &MI : *MBB) {
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
+ MI.getOpcode() == TargetOpcode::LIFETIME_END) {
+ int Slot = getStartOrEndSlot(MI);
+ if (Slot < 0)
+ continue;
+ InterestingSlots.set(Slot);
+ if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
+ BetweenStartEnd.set(Slot);
+ NumStartLifetimes[Slot] += 1;
+ } else {
+ BetweenStartEnd.reset(Slot);
+ NumEndLifetimes[Slot] += 1;
+ }
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+ if (Allocation) {
+ DEBUG(dbgs() << "Found a lifetime ");
+ DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
+ ? "start"
+ : "end"));
+ DEBUG(dbgs() << " marker for slot #" << Slot);
+ DEBUG(dbgs() << " with allocation: " << Allocation->getName()
+ << "\n");
+ }
+ Markers.push_back(&MI);
+ MarkersFound += 1;
+ } else {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isFI())
+ continue;
+ int Slot = MO.getIndex();
+ if (Slot < 0)
+ continue;
+ if (! BetweenStartEnd.test(Slot)) {
+ ConservativeSlots.set(Slot);
+ }
+ }
+ }
+ }
+ BitVector &SeenStart = SeenStartMap[MBB];
+ SeenStart |= BetweenStartEnd;
+ }
+ if (!MarkersFound) {
+ return 0;
+ }
+
+ // PR27903: slots with multiple start or end lifetime ops are not
+ // safe to enable for "lifetime-start-on-first-use".
+ for (unsigned slot = 0; slot < NumSlot; ++slot)
+ if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
+ ConservativeSlots.set(slot);
+ DEBUG(dumpBV("Conservative slots", ConservativeSlots));
+
+ // Step 2: compute begin/end sets for each block
+
+ // NOTE: We use a reverse-post-order iteration to ensure that we obtain a
+ // deterministic numbering, and because we'll need a post-order iteration
+ // later for solving the liveness dataflow problem.
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
+
+ // Assign a serial number to this basic block.
+ BasicBlocks[MBB] = BasicBlockNumbering.size();
+ BasicBlockNumbering.push_back(MBB);
+
+ // Keep a reference to avoid repeated lookups.
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
+
+ BlockInfo.Begin.resize(NumSlot);
+ BlockInfo.End.resize(NumSlot);
+
+ SmallVector<int, 4> slots;
+ for (MachineInstr &MI : *MBB) {
+ bool isStart = false;
+ slots.clear();
+ if (isLifetimeStartOrEnd(MI, slots, isStart)) {
+ if (!isStart) {
+ assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
+ int Slot = slots[0];
+ if (BlockInfo.Begin.test(Slot)) {
+ BlockInfo.Begin.reset(Slot);
+ }
+ BlockInfo.End.set(Slot);
+ } else {
+ for (auto Slot : slots) {
+ DEBUG(dbgs() << "Found a use of slot #" << Slot);
+ DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index ");
+ DEBUG(Indexes->getInstructionIndex(MI).print(dbgs()));
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
+ if (Allocation) {
+ DEBUG(dbgs() << " with allocation: "<< Allocation->getName());
+ }
+ DEBUG(dbgs() << "\n");
+ if (BlockInfo.End.test(Slot)) {
+ BlockInfo.End.reset(Slot);
+ }
+ BlockInfo.Begin.set(Slot);
+ }
+ }
+ }
+ }
+ }
+
+ // Update statistics.
+ NumMarkerSeen += MarkersFound;
+ return MarkersFound;
+}
+
+void StackColoring::calculateLocalLiveness()
+{
+ unsigned NumIters = 0;
+ bool changed = true;
+ while (changed) {
+ changed = false;
+ ++NumIters;
+
+ for (const MachineBasicBlock *BB : BasicBlockNumbering) {
+
+ // Use an iterator to avoid repeated lookups.
+ LivenessMap::iterator BI = BlockLiveness.find(BB);
+ assert(BI != BlockLiveness.end() && "Block not found");
+ BlockLifetimeInfo &BlockInfo = BI->second;
+
+ // Compute LiveIn by unioning together the LiveOut sets of all preds.
+ BitVector LocalLiveIn;
+ for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
+ PE = BB->pred_end(); PI != PE; ++PI) {
+ LivenessMap::const_iterator I = BlockLiveness.find(*PI);
+ assert(I != BlockLiveness.end() && "Predecessor not found");
+ LocalLiveIn |= I->second.LiveOut;
+ }
+
+ // Compute LiveOut by subtracting out lifetimes that end in this
+ // block, then adding in lifetimes that begin in this block. If
+ // we have both BEGIN and END markers in the same basic block
+ // then we know that the BEGIN marker comes after the END,
+ // because we already handle the case where the BEGIN comes
+ // before the END when collecting the markers (and building the
+ // BEGIN/END vectors).
+ BitVector LocalLiveOut = LocalLiveIn;
+ LocalLiveOut.reset(BlockInfo.End);
+ LocalLiveOut |= BlockInfo.Begin;
+
+ // Update block LiveIn set, noting whether it has changed.
+ if (LocalLiveIn.test(BlockInfo.LiveIn)) {
+ changed = true;
+ BlockInfo.LiveIn |= LocalLiveIn;
+ }
+
+ // Update block LiveOut set, noting whether it has changed.
+ if (LocalLiveOut.test(BlockInfo.LiveOut)) {
+ changed = true;
+ BlockInfo.LiveOut |= LocalLiveOut;
+ }
+ }
+ }// while changed.
+
+ NumIterations = NumIters;
+}
+
+void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
+ SmallVector<SlotIndex, 16> Starts;
+ SmallVector<SlotIndex, 16> Finishes;
+
+ // For each block, find which slots are active within this block
+ // and update the live intervals.
+ for (const MachineBasicBlock &MBB : *MF) {
+ Starts.clear();
+ Starts.resize(NumSlots);
+ Finishes.clear();
+ Finishes.resize(NumSlots);
+
+ // Create the interval for the basic blocks containing lifetime begin/end.
+ for (const MachineInstr &MI : MBB) {
+
+ SmallVector<int, 4> slots;
+ bool IsStart = false;
+ if (!isLifetimeStartOrEnd(MI, slots, IsStart))
+ continue;
+ SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+ for (auto Slot : slots) {
+ if (IsStart) {
+ if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+ Starts[Slot] = ThisIndex;
+ } else {
+ if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+ Finishes[Slot] = ThisIndex;
+ }
+ }
+ }
+
+ // Create the interval of the blocks that we previously found to be 'alive'.
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
+ for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
+ pos = MBBLiveness.LiveIn.find_next(pos)) {
+ Starts[pos] = Indexes->getMBBStartIdx(&MBB);
+ }
+ for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
+ pos = MBBLiveness.LiveOut.find_next(pos)) {
+ Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
+ }
+
+ for (unsigned i = 0; i < NumSlots; ++i) {
+ //
+ // When LifetimeStartOnFirstUse is turned on, data flow analysis
+ // is forward (from starts to ends), not bidirectional. A
+ // consequence of this is that we can wind up in situations
+ // where Starts[i] is invalid but Finishes[i] is valid and vice
+ // versa. Example:
+ //
+ // LIFETIME_START x
+ // if (...) {
+ // <use of x>
+ // throw ...;
+ // }
+ // LIFETIME_END x
+ // return 2;
+ //
+ //
+ // Here the slot for "x" will not be live into the block
+ // containing the "return 2" (since lifetimes start with first
+ // use, not at the dominating LIFETIME_START marker).
+ //
+ if (Starts[i].isValid() && !Finishes[i].isValid()) {
+ Finishes[i] = Indexes->getMBBEndIdx(&MBB);
+ }
+ if (!Starts[i].isValid())
+ continue;
+
+ assert(Starts[i] && Finishes[i] && "Invalid interval");
+ VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
+ SlotIndex S = Starts[i];
+ SlotIndex F = Finishes[i];
+ if (S < F) {
+ // We have a single consecutive region.
+ Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
+ } else {
+ // We have two non-consecutive regions. This happens when
+ // LIFETIME_START appears after the LIFETIME_END marker.
+ SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
+ SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
+ Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
+ Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
+ }
+ }
+ }
+}
+
+bool StackColoring::removeAllMarkers() {
+ unsigned Count = 0;
+ for (MachineInstr *MI : Markers) {
+ MI->eraseFromParent();
+ Count++;
+ }
+ Markers.clear();
+
+ DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n");
+ return Count;
+}
+
+void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
+ unsigned FixedInstr = 0;
+ unsigned FixedMemOp = 0;
+ unsigned FixedDbg = 0;
+ MachineModuleInfo *MMI = &MF->getMMI();
+
+ // Remap debug information that refers to stack slots.
+ for (auto &VI : MMI->getVariableDbgInfo()) {
+ if (!VI.Var)
+ continue;
+ if (SlotRemap.count(VI.Slot)) {
+ DEBUG(dbgs() << "Remapping debug info for ["
+ << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
+ VI.Slot = SlotRemap[VI.Slot];
+ FixedDbg++;
+ }
+ }
+
+ // Keep a list of *allocas* which need to be remapped.
+ DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
+ for (const std::pair<int, int> &SI : SlotRemap) {
+ const AllocaInst *From = MFI->getObjectAllocation(SI.first);
+ const AllocaInst *To = MFI->getObjectAllocation(SI.second);
+ assert(To && From && "Invalid allocation object");
+ Allocas[From] = To;
+
+ // AA might be used later for instruction scheduling, and we need it to be
+ // able to deduce the correct aliasing releationships between pointers
+ // derived from the alloca being remapped and the target of that remapping.
+ // The only safe way, without directly informing AA about the remapping
+ // somehow, is to directly update the IR to reflect the change being made
+ // here.
+ Instruction *Inst = const_cast<AllocaInst *>(To);
+ if (From->getType() != To->getType()) {
+ BitCastInst *Cast = new BitCastInst(Inst, From->getType());
+ Cast->insertAfter(Inst);
+ Inst = Cast;
+ }
+
+ // Allow the stack protector to adjust its value map to account for the
+ // upcoming replacement.
+ SP->adjustForColoring(From, To);
+
+ // The new alloca might not be valid in a llvm.dbg.declare for this
+ // variable, so undef out the use to make the verifier happy.
+ AllocaInst *FromAI = const_cast<AllocaInst *>(From);
+ if (FromAI->isUsedByMetadata())
+ ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType()));
+ for (auto &Use : FromAI->uses()) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
+ if (BCI->isUsedByMetadata())
+ ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType()));
+ }
+
+ // Note that this will not replace uses in MMOs (which we'll update below),
+ // or anywhere else (which is why we won't delete the original
+ // instruction).
+ FromAI->replaceAllUsesWith(Inst);
+ }
+
+ // Remap all instructions to the new stack slots.
+ for (MachineBasicBlock &BB : *MF)
+ for (MachineInstr &I : BB) {
+ // Skip lifetime markers. We'll remove them soon.
+ if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
+ I.getOpcode() == TargetOpcode::LIFETIME_END)
+ continue;
+
+ // Update the MachineMemOperand to use the new alloca.
+ for (MachineMemOperand *MMO : I.memoperands()) {
+ // FIXME: In order to enable the use of TBAA when using AA in CodeGen,
+ // we'll also need to update the TBAA nodes in MMOs with values
+ // derived from the merged allocas. When doing this, we'll need to use
+ // the same variant of GetUnderlyingObjects that is used by the
+ // instruction scheduler (that can look through ptrtoint/inttoptr
+ // pairs).
+
+ // We've replaced IR-level uses of the remapped allocas, so we only
+ // need to replace direct uses here.
+ const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
+ if (!AI)
+ continue;
+
+ if (!Allocas.count(AI))
+ continue;
+
+ MMO->setValue(Allocas[AI]);
+ FixedMemOp++;
+ }
+
+ // Update all of the machine instruction operands.
+ for (MachineOperand &MO : I.operands()) {
+ if (!MO.isFI())
+ continue;
+ int FromSlot = MO.getIndex();
+
+ // Don't touch arguments.
+ if (FromSlot<0)
+ continue;
+
+ // Only look at mapped slots.
+ if (!SlotRemap.count(FromSlot))
+ continue;
+
+ // In a debug build, check that the instruction that we are modifying is
+ // inside the expected live range. If the instruction is not inside
+ // the calculated range then it means that the alloca usage moved
+ // outside of the lifetime markers, or that the user has a bug.
+ // NOTE: Alloca address calculations which happen outside the lifetime
+ // zone are are okay, despite the fact that we don't have a good way
+ // for validating all of the usages of the calculation.
+#ifndef NDEBUG
+ bool TouchesMemory = I.mayLoad() || I.mayStore();
+ // If we *don't* protect the user from escaped allocas, don't bother
+ // validating the instructions.
+ if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ const LiveInterval *Interval = &*Intervals[FromSlot];
+ assert(Interval->find(Index) != Interval->end() &&
+ "Found instruction usage outside of live range.");
+ }
+#endif
+
+ // Fix the machine instructions.
+ int ToSlot = SlotRemap[FromSlot];
+ MO.setIndex(ToSlot);
+ FixedInstr++;
+ }
+ }
+
+ // Update the location of C++ catch objects for the MSVC personality routine.
+ if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
+ for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
+ for (WinEHHandlerType &H : TBME.HandlerArray)
+ if (H.CatchObj.FrameIndex != INT_MAX &&
+ SlotRemap.count(H.CatchObj.FrameIndex))
+ H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
+
+ DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
+ DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
+ DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
+}
+
+void StackColoring::removeInvalidSlotRanges() {
+ for (MachineBasicBlock &BB : *MF)
+ for (MachineInstr &I : BB) {
+ if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
+ I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue())
+ continue;
+
+ // Some intervals are suspicious! In some cases we find address
+ // calculations outside of the lifetime zone, but not actual memory
+ // read or write. Memory accesses outside of the lifetime zone are a clear
+ // violation, but address calculations are okay. This can happen when
+ // GEPs are hoisted outside of the lifetime zone.
+ // So, in here we only check instructions which can read or write memory.
+ if (!I.mayLoad() && !I.mayStore())
+ continue;
+
+ // Check all of the machine operands.
+ for (const MachineOperand &MO : I.operands()) {
+ if (!MO.isFI())
+ continue;
+
+ int Slot = MO.getIndex();
+
+ if (Slot<0)
+ continue;
+
+ if (Intervals[Slot]->empty())
+ continue;
+
+ // Check that the used slot is inside the calculated lifetime range.
+ // If it is not, warn about it and invalidate the range.
+ LiveInterval *Interval = &*Intervals[Slot];
+ SlotIndex Index = Indexes->getInstructionIndex(I);
+ if (Interval->find(Index) == Interval->end()) {
+ Interval->clear();
+ DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
+ EscapedAllocas++;
+ }
+ }
+ }
+}
+
+void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
+ unsigned NumSlots) {
+ // Expunge slot remap map.
+ for (unsigned i=0; i < NumSlots; ++i) {
+ // If we are remapping i
+ if (SlotRemap.count(i)) {
+ int Target = SlotRemap[i];
+ // As long as our target is mapped to something else, follow it.
+ while (SlotRemap.count(Target)) {
+ Target = SlotRemap[Target];
+ SlotRemap[i] = Target;
+ }
+ }
+ }
+}
+
+bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
+ DEBUG(dbgs() << "********** Stack Coloring **********\n"
+ << "********** Function: "
+ << ((const Value*)Func.getFunction())->getName() << '\n');
+ MF = &Func;
+ MFI = MF->getFrameInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ SP = &getAnalysis<StackProtector>();
+ BlockLiveness.clear();
+ BasicBlocks.clear();
+ BasicBlockNumbering.clear();
+ Markers.clear();
+ Intervals.clear();
+ VNInfoAllocator.Reset();
+
+ unsigned NumSlots = MFI->getObjectIndexEnd();
+
+ // If there are no stack slots then there are no markers to remove.
+ if (!NumSlots)
+ return false;
+
+ SmallVector<int, 8> SortedSlots;
+ SortedSlots.reserve(NumSlots);
+ Intervals.reserve(NumSlots);
+
+ unsigned NumMarkers = collectMarkers(NumSlots);
+
+ unsigned TotalSize = 0;
+ DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n");
+ DEBUG(dbgs()<<"Slot structure:\n");
+
+ for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
+ DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n");
+ TotalSize += MFI->getObjectSize(i);
+ }
+
+ DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n");
+
+ // Don't continue because there are not enough lifetime markers, or the
+ // stack is too small, or we are told not to optimize the slots.
+ if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
+ skipFunction(*Func.getFunction())) {
+ DEBUG(dbgs()<<"Will not try to merge slots.\n");
+ return removeAllMarkers();
+ }
+
+ for (unsigned i=0; i < NumSlots; ++i) {
+ std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
+ LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
+ Intervals.push_back(std::move(LI));
+ SortedSlots.push_back(i);
+ }
+
+ // Calculate the liveness of each block.
+ calculateLocalLiveness();
+ DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
+ DEBUG(dump());
+
+ // Propagate the liveness information.
+ calculateLiveIntervals(NumSlots);
+ DEBUG(dumpIntervals());
+
+ // Search for allocas which are used outside of the declared lifetime
+ // markers.
+ if (ProtectFromEscapedAllocas)
+ removeInvalidSlotRanges();
+
+ // Maps old slots to new slots.
+ DenseMap<int, int> SlotRemap;
+ unsigned RemovedSlots = 0;
+ unsigned ReducedSize = 0;
+
+ // Do not bother looking at empty intervals.
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (Intervals[SortedSlots[I]]->empty())
+ SortedSlots[I] = -1;
+ }
+
+ // This is a simple greedy algorithm for merging allocas. First, sort the
+ // slots, placing the largest slots first. Next, perform an n^2 scan and look
+ // for disjoint slots. When you find disjoint slots, merge the samller one
+ // into the bigger one and update the live interval. Remove the small alloca
+ // and continue.
+
+ // Sort the slots according to their size. Place unused slots at the end.
+ // Use stable sort to guarantee deterministic code generation.
+ std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
+ [this](int LHS, int RHS) {
+ // We use -1 to denote a uninteresting slot. Place these slots at the end.
+ if (LHS == -1) return false;
+ if (RHS == -1) return true;
+ // Sort according to size.
+ return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+ });
+
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ for (unsigned I = 0; I < NumSlots; ++I) {
+ if (SortedSlots[I] == -1)
+ continue;
+
+ for (unsigned J=I+1; J < NumSlots; ++J) {
+ if (SortedSlots[J] == -1)
+ continue;
+
+ int FirstSlot = SortedSlots[I];
+ int SecondSlot = SortedSlots[J];
+ LiveInterval *First = &*Intervals[FirstSlot];
+ LiveInterval *Second = &*Intervals[SecondSlot];
+ assert (!First->empty() && !Second->empty() && "Found an empty range");
+
+ // Merge disjoint slots.
+ if (!First->overlaps(*Second)) {
+ Changed = true;
+ First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
+ SlotRemap[SecondSlot] = FirstSlot;
+ SortedSlots[J] = -1;
+ DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
+ SecondSlot<<" together.\n");
+ unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot),
+ MFI->getObjectAlignment(SecondSlot));
+
+ assert(MFI->getObjectSize(FirstSlot) >=
+ MFI->getObjectSize(SecondSlot) &&
+ "Merging a small object into a larger one");
+
+ RemovedSlots+=1;
+ ReducedSize += MFI->getObjectSize(SecondSlot);
+ MFI->setObjectAlignment(FirstSlot, MaxAlignment);
+ MFI->RemoveStackObject(SecondSlot);
+ }
+ }
+ }
+ }// While changed.
+
+ // Record statistics.
+ StackSpaceSaved += ReducedSize;
+ StackSlotMerged += RemovedSlots;
+ DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<<
+ ReducedSize<<" bytes\n");
+
+ // Scan the entire function and update all machine operands that use frame
+ // indices to use the remapped frame index.
+ expungeSlotMap(SlotRemap, NumSlots);
+ remapInstructions(SlotRemap);
+
+ return removeAllMarkers();
+}
diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
new file mode 100644
index 000000000000..87e4eb66c9c9
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -0,0 +1,172 @@
+//===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StackMap Liveness analysis pass. The pass calculates
+// the liveness for each basic block in a function and attaches the register
+// live-out information to a stackmap or patchpoint intrinsic if present.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stackmaps"
+
+static cl::opt<bool> EnablePatchPointLiveness(
+ "enable-patchpoint-liveness", cl::Hidden, cl::init(true),
+ cl::desc("Enable PatchPoint Liveness Analysis Pass"));
+
+STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
+STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped");
+STATISTIC(NumBBsVisited, "Number of basic blocks visited");
+STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap");
+STATISTIC(NumStackMaps, "Number of StackMaps visited");
+
+namespace {
+/// \brief This pass calculates the liveness information for each basic block in
+/// a function and attaches the register live-out information to a patchpoint
+/// intrinsic if present.
+///
+/// This pass can be disabled via the -enable-patchpoint-liveness=false flag.
+/// The pass skips functions that don't have any patchpoint intrinsics. The
+/// information provided by this pass is optional and not required by the
+/// aformentioned intrinsic to function.
+class StackMapLiveness : public MachineFunctionPass {
+ const TargetRegisterInfo *TRI;
+ LivePhysRegs LiveRegs;
+
+public:
+ static char ID;
+
+ /// \brief Default construct and initialize the pass.
+ StackMapLiveness();
+
+ /// \brief Tell the pass manager which passes we depend on and what
+ /// information we preserve.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+
+ /// \brief Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ /// \brief Performs the actual liveness calculation for the function.
+ bool calculateLiveness(MachineFunction &MF);
+
+ /// \brief Add the current register live set to the instruction.
+ void addLiveOutSetToMI(MachineFunction &MF, MachineInstr &MI);
+
+ /// \brief Create a register mask and initialize it with the registers from
+ /// the register live set.
+ uint32_t *createRegisterMask(MachineFunction &MF) const;
+};
+} // namespace
+
+char StackMapLiveness::ID = 0;
+char &llvm::StackMapLivenessID = StackMapLiveness::ID;
+INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness",
+ "StackMap Liveness Analysis", false, false)
+
+/// Default construct and initialize the pass.
+StackMapLiveness::StackMapLiveness() : MachineFunctionPass(ID) {
+ initializeStackMapLivenessPass(*PassRegistry::getPassRegistry());
+}
+
+/// Tell the pass manager which passes we depend on and what information we
+/// preserve.
+void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
+ // We preserve all information.
+ AU.setPreservesAll();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Calculate the liveness information for the given machine function.
+bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
+ if (!EnablePatchPointLiveness)
+ return false;
+
+ DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName()
+ << " **********\n");
+ TRI = MF.getSubtarget().getRegisterInfo();
+ ++NumStackMapFuncVisited;
+
+ // Skip this function if there are no patchpoints to process.
+ if (!MF.getFrameInfo()->hasPatchPoint()) {
+ ++NumStackMapFuncSkipped;
+ return false;
+ }
+ return calculateLiveness(MF);
+}
+
+/// Performs the actual liveness calculation for the function.
+bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
+ bool HasChanged = false;
+ // For all basic blocks in the function.
+ for (auto &MBB : MF) {
+ DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
+ LiveRegs.init(TRI);
+ // FIXME: This should probably be addLiveOuts().
+ LiveRegs.addLiveOutsNoPristines(MBB);
+ bool HasStackMap = false;
+ // Reverse iterate over all instructions and add the current live register
+ // set to an instruction if we encounter a patchpoint instruction.
+ for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
+ if (I->getOpcode() == TargetOpcode::PATCHPOINT) {
+ addLiveOutSetToMI(MF, *I);
+ HasChanged = true;
+ HasStackMap = true;
+ ++NumStackMaps;
+ }
+ DEBUG(dbgs() << " " << LiveRegs << " " << *I);
+ LiveRegs.stepBackward(*I);
+ }
+ ++NumBBsVisited;
+ if (!HasStackMap)
+ ++NumBBsHaveNoStackmap;
+ }
+ return HasChanged;
+}
+
+/// Add the current register live set to the instruction.
+void StackMapLiveness::addLiveOutSetToMI(MachineFunction &MF,
+ MachineInstr &MI) {
+ uint32_t *Mask = createRegisterMask(MF);
+ MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask);
+ MI.addOperand(MF, MO);
+}
+
+/// Create a register mask and initialize it with the registers from the
+/// register live set.
+uint32_t *StackMapLiveness::createRegisterMask(MachineFunction &MF) const {
+ // The mask is owned and cleaned up by the Machine Function.
+ uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ for (auto Reg : LiveRegs)
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+
+ // Give the target a chance to adjust the mask.
+ TRI->adjustStackMapLiveOutMask(Mask);
+
+ return Mask;
+}
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
new file mode 100644
index 000000000000..d91bb8066aed
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -0,0 +1,552 @@
+//===---------------------------- StackMaps.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <iterator>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stackmaps"
+
+static cl::opt<int> StackMapVersion(
+ "stackmap-version", cl::init(1),
+ cl::desc("Specify the stackmap encoding version (default = 1)"));
+
+const char *StackMaps::WSMP = "Stack Maps: ";
+
+PatchPointOpers::PatchPointOpers(const MachineInstr *MI)
+ : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+ !MI->getOperand(0).isImplicit()),
+ IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() ==
+ CallingConv::AnyReg) {
+#ifndef NDEBUG
+ unsigned CheckStartIdx = 0, e = MI->getNumOperands();
+ while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() &&
+ MI->getOperand(CheckStartIdx).isDef() &&
+ !MI->getOperand(CheckStartIdx).isImplicit())
+ ++CheckStartIdx;
+
+ assert(getMetaIdx() == CheckStartIdx &&
+ "Unexpected additional definition in Patchpoint intrinsic.");
+#endif
+}
+
+unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
+ if (!StartIdx)
+ StartIdx = getVarIdx();
+
+ // Find the next scratch register (implicit def and early clobber)
+ unsigned ScratchIdx = StartIdx, e = MI->getNumOperands();
+ while (ScratchIdx < e &&
+ !(MI->getOperand(ScratchIdx).isReg() &&
+ MI->getOperand(ScratchIdx).isDef() &&
+ MI->getOperand(ScratchIdx).isImplicit() &&
+ MI->getOperand(ScratchIdx).isEarlyClobber()))
+ ++ScratchIdx;
+
+ assert(ScratchIdx != e && "No scratch register available");
+ return ScratchIdx;
+}
+
+StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
+ if (StackMapVersion != 1)
+ llvm_unreachable("Unsupported stackmap version!");
+}
+
+/// Go up the super-register chain until we hit a valid dwarf register number.
+static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
+ int RegNum = TRI->getDwarfRegNum(Reg, false);
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNum < 0; ++SR)
+ RegNum = TRI->getDwarfRegNum(*SR, false);
+
+ assert(RegNum >= 0 && "Invalid Dwarf register number.");
+ return (unsigned)RegNum;
+}
+
+MachineInstr::const_mop_iterator
+StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE, LocationVec &Locs,
+ LiveOutVec &LiveOuts) const {
+ const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
+ if (MOI->isImm()) {
+ switch (MOI->getImm()) {
+ default:
+ llvm_unreachable("Unrecognized operand type.");
+ case StackMaps::DirectMemRefOp: {
+ auto &DL = AP.MF->getDataLayout();
+
+ unsigned Size = DL.getPointerSizeInBits();
+ assert((Size % 8) == 0 && "Need pointer size in bytes.");
+ Size /= 8;
+ unsigned Reg = (++MOI)->getReg();
+ int64_t Imm = (++MOI)->getImm();
+ Locs.emplace_back(StackMaps::Location::Direct, Size,
+ getDwarfRegNum(Reg, TRI), Imm);
+ break;
+ }
+ case StackMaps::IndirectMemRefOp: {
+ int64_t Size = (++MOI)->getImm();
+ assert(Size > 0 && "Need a valid size for indirect memory locations.");
+ unsigned Reg = (++MOI)->getReg();
+ int64_t Imm = (++MOI)->getImm();
+ Locs.emplace_back(StackMaps::Location::Indirect, Size,
+ getDwarfRegNum(Reg, TRI), Imm);
+ break;
+ }
+ case StackMaps::ConstantOp: {
+ ++MOI;
+ assert(MOI->isImm() && "Expected constant operand.");
+ int64_t Imm = MOI->getImm();
+ Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, Imm);
+ break;
+ }
+ }
+ return ++MOI;
+ }
+
+ // The physical register number will ultimately be encoded as a DWARF regno.
+ // The stack map also records the size of a spill slot that can hold the
+ // register content. (The runtime can track the actual size of the data type
+ // if it needs to.)
+ if (MOI->isReg()) {
+ // Skip implicit registers (this includes our scratch registers)
+ if (MOI->isImplicit())
+ return ++MOI;
+
+ assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
+ "Virtreg operands should have been rewritten before now.");
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
+ assert(!MOI->getSubReg() && "Physical subreg still around.");
+
+ unsigned Offset = 0;
+ unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI);
+ unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false);
+ unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg());
+ if (SubRegIdx)
+ Offset = TRI->getSubRegIdxOffset(SubRegIdx);
+
+ Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset);
+ return ++MOI;
+ }
+
+ if (MOI->isRegLiveOut())
+ LiveOuts = parseRegisterLiveOutMask(MOI->getRegLiveOut());
+
+ return ++MOI;
+}
+
+void StackMaps::print(raw_ostream &OS) {
+ const TargetRegisterInfo *TRI =
+ AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr;
+ OS << WSMP << "callsites:\n";
+ for (const auto &CSI : CSInfos) {
+ const LocationVec &CSLocs = CSI.Locations;
+ const LiveOutVec &LiveOuts = CSI.LiveOuts;
+
+ OS << WSMP << "callsite " << CSI.ID << "\n";
+ OS << WSMP << " has " << CSLocs.size() << " locations\n";
+
+ unsigned Idx = 0;
+ for (const auto &Loc : CSLocs) {
+ OS << WSMP << "\t\tLoc " << Idx << ": ";
+ switch (Loc.Type) {
+ case Location::Unprocessed:
+ OS << "<Unprocessed operand>";
+ break;
+ case Location::Register:
+ OS << "Register ";
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
+ break;
+ case Location::Direct:
+ OS << "Direct ";
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
+ if (Loc.Offset)
+ OS << " + " << Loc.Offset;
+ break;
+ case Location::Indirect:
+ OS << "Indirect ";
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
+ OS << "+" << Loc.Offset;
+ break;
+ case Location::Constant:
+ OS << "Constant " << Loc.Offset;
+ break;
+ case Location::ConstantIndex:
+ OS << "Constant Index " << Loc.Offset;
+ break;
+ }
+ OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size
+ << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n";
+ Idx++;
+ }
+
+ OS << WSMP << "\thas " << LiveOuts.size() << " live-out registers\n";
+
+ Idx = 0;
+ for (const auto &LO : LiveOuts) {
+ OS << WSMP << "\t\tLO " << Idx << ": ";
+ if (TRI)
+ OS << TRI->getName(LO.Reg);
+ else
+ OS << LO.Reg;
+ OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte "
+ << LO.Size << "]\n";
+ Idx++;
+ }
+ }
+}
+
+/// Create a live-out register record for the given register Reg.
+StackMaps::LiveOutReg
+StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
+ unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI);
+ unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
+ return LiveOutReg(Reg, DwarfRegNum, Size);
+}
+
+/// Parse the register live-out mask and return a vector of live-out registers
+/// that need to be recorded in the stackmap.
+StackMaps::LiveOutVec
+StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
+ assert(Mask && "No register mask specified");
+ const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
+ LiveOutVec LiveOuts;
+
+ // Create a LiveOutReg for each bit that is set in the register mask.
+ for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg)
+ if ((Mask[Reg / 32] >> Reg % 32) & 1)
+ LiveOuts.push_back(createLiveOutReg(Reg, TRI));
+
+ // We don't need to keep track of a register if its super-register is already
+ // in the list. Merge entries that refer to the same dwarf register and use
+ // the maximum size that needs to be spilled.
+
+ std::sort(LiveOuts.begin(), LiveOuts.end(),
+ [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
+ // Only sort by the dwarf register number.
+ return LHS.DwarfRegNum < RHS.DwarfRegNum;
+ });
+
+ for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {
+ for (auto II = std::next(I); II != E; ++II) {
+ if (I->DwarfRegNum != II->DwarfRegNum) {
+ // Skip all the now invalid entries.
+ I = --II;
+ break;
+ }
+ I->Size = std::max(I->Size, II->Size);
+ if (TRI->isSuperRegister(I->Reg, II->Reg))
+ I->Reg = II->Reg;
+ II->Reg = 0; // mark for deletion.
+ }
+ }
+
+ LiveOuts.erase(
+ std::remove_if(LiveOuts.begin(), LiveOuts.end(),
+ [](const LiveOutReg &LO) { return LO.Reg == 0; }),
+ LiveOuts.end());
+
+ return LiveOuts;
+}
+
+void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ bool recordResult) {
+
+ MCContext &OutContext = AP.OutStreamer->getContext();
+ MCSymbol *MILabel = OutContext.createTempSymbol();
+ AP.OutStreamer->EmitLabel(MILabel);
+
+ LocationVec Locations;
+ LiveOutVec LiveOuts;
+
+ if (recordResult) {
+ assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value.");
+ parseOperand(MI.operands_begin(), std::next(MI.operands_begin()), Locations,
+ LiveOuts);
+ }
+
+ // Parse operands.
+ while (MOI != MOE) {
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
+ }
+
+ // Move large constants into the constant pool.
+ for (auto &Loc : Locations) {
+ // Constants are encoded as sign-extended integers.
+ // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool.
+ if (Loc.Type == Location::Constant && !isInt<32>(Loc.Offset)) {
+ Loc.Type = Location::ConstantIndex;
+ // ConstPool is intentionally a MapVector of 'uint64_t's (as
+ // opposed to 'int64_t's). We should never be in a situation
+ // where we have to insert either the tombstone or the empty
+ // keys into a map, and for a DenseMap<uint64_t, T> these are
+ // (uint64_t)0 and (uint64_t)-1. They can be and are
+ // represented using 32 bit integers.
+ assert((uint64_t)Loc.Offset != DenseMapInfo<uint64_t>::getEmptyKey() &&
+ (uint64_t)Loc.Offset !=
+ DenseMapInfo<uint64_t>::getTombstoneKey() &&
+ "empty and tombstone keys should fit in 32 bits!");
+ auto Result = ConstPool.insert(std::make_pair(Loc.Offset, Loc.Offset));
+ Loc.Offset = Result.first - ConstPool.begin();
+ }
+ }
+
+ // Create an expression to calculate the offset of the callsite from function
+ // entry.
+ const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(MILabel, OutContext),
+ MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
+
+ CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
+ std::move(LiveOuts));
+
+ // Record the stack size of the current function.
+ const MachineFrameInfo *MFI = AP.MF->getFrameInfo();
+ const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
+ bool HasDynamicFrameSize =
+ MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
+ FnStackSize[AP.CurrentFnSym] =
+ HasDynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
+}
+
+void StackMaps::recordStackMap(const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap");
+
+ int64_t ID = MI.getOperand(0).getImm();
+ recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), 2),
+ MI.operands_end());
+}
+
+void StackMaps::recordPatchPoint(const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint");
+
+ PatchPointOpers opers(&MI);
+ int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm();
+
+ auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
+ recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
+ opers.isAnyReg() && opers.hasDef());
+
+#ifndef NDEBUG
+ // verify anyregcc
+ auto &Locations = CSInfos.back().Locations;
+ if (opers.isAnyReg()) {
+ unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm();
+ for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i)
+ assert(Locations[i].Type == Location::Register &&
+ "anyreg arg must be in reg.");
+ }
+#endif
+}
+void StackMaps::recordStatepoint(const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
+
+ StatepointOpers opers(&MI);
+ // Record all the deopt and gc operands (they're contiguous and run from the
+ // initial index to the end of the operand list)
+ const unsigned StartIdx = opers.getVarIdx();
+ recordStackMapOpers(MI, opers.getID(), MI.operands_begin() + StartIdx,
+ MI.operands_end(), false);
+}
+
+/// Emit the stackmap header.
+///
+/// Header {
+/// uint8 : Stack Map Version (currently 1)
+/// uint8 : Reserved (expected to be 0)
+/// uint16 : Reserved (expected to be 0)
+/// }
+/// uint32 : NumFunctions
+/// uint32 : NumConstants
+/// uint32 : NumRecords
+void StackMaps::emitStackmapHeader(MCStreamer &OS) {
+ // Header.
+ OS.EmitIntValue(StackMapVersion, 1); // Version.
+ OS.EmitIntValue(0, 1); // Reserved.
+ OS.EmitIntValue(0, 2); // Reserved.
+
+ // Num functions.
+ DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n');
+ OS.EmitIntValue(FnStackSize.size(), 4);
+ // Num constants.
+ DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');
+ OS.EmitIntValue(ConstPool.size(), 4);
+ // Num callsites.
+ DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n');
+ OS.EmitIntValue(CSInfos.size(), 4);
+}
+
+/// Emit the function frame record for each function.
+///
+/// StkSizeRecord[NumFunctions] {
+/// uint64 : Function Address
+/// uint64 : Stack Size
+/// }
+void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
+ // Function Frame records.
+ DEBUG(dbgs() << WSMP << "functions:\n");
+ for (auto const &FR : FnStackSize) {
+ DEBUG(dbgs() << WSMP << "function addr: " << FR.first
+ << " frame size: " << FR.second);
+ OS.EmitSymbolValue(FR.first, 8);
+ OS.EmitIntValue(FR.second, 8);
+ }
+}
+
+/// Emit the constant pool.
+///
+/// int64 : Constants[NumConstants]
+void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
+ // Constant pool entries.
+ DEBUG(dbgs() << WSMP << "constants:\n");
+ for (const auto &ConstEntry : ConstPool) {
+ DEBUG(dbgs() << WSMP << ConstEntry.second << '\n');
+ OS.EmitIntValue(ConstEntry.second, 8);
+ }
+}
+
+/// Emit the callsite info for each callsite.
+///
+/// StkMapRecord[NumRecords] {
+/// uint64 : PatchPoint ID
+/// uint32 : Instruction Offset
+/// uint16 : Reserved (record flags)
+/// uint16 : NumLocations
+/// Location[NumLocations] {
+/// uint8 : Register | Direct | Indirect | Constant | ConstantIndex
+/// uint8 : Size in Bytes
+/// uint16 : Dwarf RegNum
+/// int32 : Offset
+/// }
+/// uint16 : Padding
+/// uint16 : NumLiveOuts
+/// LiveOuts[NumLiveOuts] {
+/// uint16 : Dwarf RegNum
+/// uint8 : Reserved
+/// uint8 : Size in Bytes
+/// }
+/// uint32 : Padding (only if required to align to 8 byte)
+/// }
+///
+/// Location Encoding, Type, Value:
+/// 0x1, Register, Reg (value in register)
+/// 0x2, Direct, Reg + Offset (frame index)
+/// 0x3, Indirect, [Reg + Offset] (spilled value)
+/// 0x4, Constant, Offset (small constant)
+/// 0x5, ConstIndex, Constants[Offset] (large constant)
+void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
+ DEBUG(print(dbgs()));
+ // Callsite entries.
+ for (const auto &CSI : CSInfos) {
+ const LocationVec &CSLocs = CSI.Locations;
+ const LiveOutVec &LiveOuts = CSI.LiveOuts;
+
+ // Verify stack map entry. It's better to communicate a problem to the
+ // runtime than crash in case of in-process compilation. Currently, we do
+ // simple overflow checks, but we may eventually communicate other
+ // compilation errors this way.
+ if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) {
+ OS.EmitIntValue(UINT64_MAX, 8); // Invalid ID.
+ OS.EmitValue(CSI.CSOffsetExpr, 4);
+ OS.EmitIntValue(0, 2); // Reserved.
+ OS.EmitIntValue(0, 2); // 0 locations.
+ OS.EmitIntValue(0, 2); // padding.
+ OS.EmitIntValue(0, 2); // 0 live-out registers.
+ OS.EmitIntValue(0, 4); // padding.
+ continue;
+ }
+
+ OS.EmitIntValue(CSI.ID, 8);
+ OS.EmitValue(CSI.CSOffsetExpr, 4);
+
+ // Reserved for flags.
+ OS.EmitIntValue(0, 2);
+ OS.EmitIntValue(CSLocs.size(), 2);
+
+ for (const auto &Loc : CSLocs) {
+ OS.EmitIntValue(Loc.Type, 1);
+ OS.EmitIntValue(Loc.Size, 1);
+ OS.EmitIntValue(Loc.Reg, 2);
+ OS.EmitIntValue(Loc.Offset, 4);
+ }
+
+ // Num live-out registers and padding to align to 4 byte.
+ OS.EmitIntValue(0, 2);
+ OS.EmitIntValue(LiveOuts.size(), 2);
+
+ for (const auto &LO : LiveOuts) {
+ OS.EmitIntValue(LO.DwarfRegNum, 2);
+ OS.EmitIntValue(0, 1);
+ OS.EmitIntValue(LO.Size, 1);
+ }
+ // Emit alignment to 8 byte.
+ OS.EmitValueToAlignment(8);
+ }
+}
+
+/// Serialize the stackmap data.
+void StackMaps::serializeToStackMapSection() {
+ (void)WSMP;
+ // Bail out if there's no stack map data.
+ assert((!CSInfos.empty() || ConstPool.empty()) &&
+ "Expected empty constant pool too!");
+ assert((!CSInfos.empty() || FnStackSize.empty()) &&
+ "Expected empty function record too!");
+ if (CSInfos.empty())
+ return;
+
+ MCContext &OutContext = AP.OutStreamer->getContext();
+ MCStreamer &OS = *AP.OutStreamer;
+
+ // Create the section.
+ MCSection *StackMapSection =
+ OutContext.getObjectFileInfo()->getStackMapSection();
+ OS.SwitchSection(StackMapSection);
+
+ // Emit a dummy symbol to force section inclusion.
+ OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));
+
+ // Serialize data.
+ DEBUG(dbgs() << "********** Stack Map Output **********\n");
+ emitStackmapHeader(OS);
+ emitFunctionFrameRecords(OS);
+ emitConstantPoolEntries(OS);
+ emitCallsiteEntries(OS);
+ OS.AddBlankLine();
+
+ // Clean up.
+ CSInfos.clear();
+ ConstPool.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 000000000000..89868e43aba4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,471 @@
+//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cstdlib>
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-protector"
+
+STATISTIC(NumFunProtected, "Number of functions protected");
+STATISTIC(NumAddrTaken, "Number of local variables that have their address"
+ " taken.");
+
+static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
+ cl::init(true), cl::Hidden);
+
+char StackProtector::ID = 0;
+INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors",
+ false, true)
+
+FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
+ return new StackProtector(TM);
+}
+
+StackProtector::SSPLayoutKind
+StackProtector::getSSPLayout(const AllocaInst *AI) const {
+ return AI ? Layout.lookup(AI) : SSPLK_None;
+}
+
+void StackProtector::adjustForColoring(const AllocaInst *From,
+ const AllocaInst *To) {
+ // When coloring replaces one alloca with another, transfer the SSPLayoutKind
+ // tag from the remapped to the target alloca. The remapped alloca should
+ // have a size smaller than or equal to the replacement alloca.
+ SSPLayoutMap::iterator I = Layout.find(From);
+ if (I != Layout.end()) {
+ SSPLayoutKind Kind = I->second;
+ Layout.erase(I);
+
+ // Transfer the tag, but make sure that SSPLK_AddrOf does not overwrite
+ // SSPLK_SmallArray or SSPLK_LargeArray, and make sure that
+ // SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
+ I = Layout.find(To);
+ if (I == Layout.end())
+ Layout.insert(std::make_pair(To, Kind));
+ else if (I->second != SSPLK_LargeArray && Kind != SSPLK_AddrOf)
+ I->second = Kind;
+ }
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+ F = &Fn;
+ M = F->getParent();
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
+ HasPrologue = false;
+ HasIRCheck = false;
+
+ Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size");
+ if (Attr.isStringAttribute() &&
+ Attr.getValueAsString().getAsInteger(10, SSPBufferSize))
+ return false; // Invalid integer string
+
+ if (!RequiresStackProtector())
+ return false;
+
+ // TODO(etienneb): Functions with funclets are not correctly supported now.
+ // Do nothing if this is funclet-based personality.
+ if (Fn.hasPersonalityFn()) {
+ EHPersonality Personality = classifyEHPersonality(Fn.getPersonalityFn());
+ if (isFuncletEHPersonality(Personality))
+ return false;
+ }
+
+ ++NumFunProtected;
+ return InsertStackProtectors();
+}
+
+/// \param [out] IsLarge is set to true if a protectable array is found and
+/// it is "large" ( >= ssp-buffer-size). In the case of a structure with
+/// multiple arrays, this gets set if any of them is large.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
+ bool Strong,
+ bool InStruct) const {
+ if (!Ty)
+ return false;
+ if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ if (!AT->getElementType()->isIntegerTy(8)) {
+ // If we're on a non-Darwin platform or we're inside of a structure, don't
+ // add stack protectors unless the array is a character array.
+ // However, in strong mode any array, regardless of type and size,
+ // triggers a protector.
+ if (!Strong && (InStruct || !Trip.isOSDarwin()))
+ return false;
+ }
+
+ // If an array has more than SSPBufferSize bytes of allocated space, then we
+ // emit stack protectors.
+ if (SSPBufferSize <= M->getDataLayout().getTypeAllocSize(AT)) {
+ IsLarge = true;
+ return true;
+ }
+
+ if (Strong)
+ // Require a protector for all arrays in strong mode
+ return true;
+ }
+
+ const StructType *ST = dyn_cast<StructType>(Ty);
+ if (!ST)
+ return false;
+
+ bool NeedsProtector = false;
+ for (StructType::element_iterator I = ST->element_begin(),
+ E = ST->element_end();
+ I != E; ++I)
+ if (ContainsProtectableArray(*I, IsLarge, Strong, true)) {
+ // If the element is a protectable array and is large (>= SSPBufferSize)
+ // then we are done. If the protectable array is not large, then
+ // keep looking in case a subsequent element is a large array.
+ if (IsLarge)
+ return true;
+ NeedsProtector = true;
+ }
+
+ return NeedsProtector;
+}
+
+bool StackProtector::HasAddressTaken(const Instruction *AI) {
+ for (const User *U : AI->users()) {
+ if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (AI == SI->getValueOperand())
+ return true;
+ } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {
+ if (AI == SI->getOperand(0))
+ return true;
+ } else if (isa<CallInst>(U)) {
+ return true;
+ } else if (isa<InvokeInst>(U)) {
+ return true;
+ } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) {
+ if (HasAddressTaken(SI))
+ return true;
+ } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+ // Keep track of what PHI nodes we have already visited to ensure
+ // they are only visited once.
+ if (VisitedPHIs.insert(PN).second)
+ if (HasAddressTaken(PN))
+ return true;
+ } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (HasAddressTaken(GEP))
+ return true;
+ } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
+ if (HasAddressTaken(BI))
+ return true;
+ }
+ }
+ return false;
+}
+
+/// \brief Check whether or not this function needs a stack protector based
+/// upon the stack protector level.
+///
+/// We use two heuristics: a standard (ssp) and strong (sspstrong).
+/// The standard heuristic which will add a guard variable to functions that
+/// call alloca with a either a variable size or a size >= SSPBufferSize,
+/// functions with character buffers larger than SSPBufferSize, and functions
+/// with aggregates containing character buffers larger than SSPBufferSize. The
+/// strong heuristic will add a guard variables to functions that call alloca
+/// regardless of size, functions with any buffer regardless of type and size,
+/// functions with aggregates that contain any buffer regardless of type and
+/// size, and functions that contain stack-based variables that have had their
+/// address taken.
+bool StackProtector::RequiresStackProtector() {
+ bool Strong = false;
+ bool NeedsProtector = false;
+ for (const BasicBlock &BB : *F)
+ for (const Instruction &I : BB)
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->getCalledFunction() ==
+ Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::stackprotector))
+ HasPrologue = true;
+
+ if (F->hasFnAttribute(Attribute::SafeStack))
+ return false;
+
+ if (F->hasFnAttribute(Attribute::StackProtectReq)) {
+ NeedsProtector = true;
+ Strong = true; // Use the same heuristic as strong to determine SSPLayout
+ } else if (F->hasFnAttribute(Attribute::StackProtectStrong))
+ Strong = true;
+ else if (HasPrologue)
+ NeedsProtector = true;
+ else if (!F->hasFnAttribute(Attribute::StackProtect))
+ return false;
+
+ for (const BasicBlock &BB : *F) {
+ for (const Instruction &I : BB) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ if (AI->isArrayAllocation()) {
+ // SSP-Strong: Enable protectors for any call to alloca, regardless
+ // of size.
+ if (Strong)
+ return true;
+
+ if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+ if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
+ // A call to alloca with size >= SSPBufferSize requires
+ // stack protectors.
+ Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ NeedsProtector = true;
+ } else if (Strong) {
+ // Require protectors for all alloca calls in strong mode.
+ Layout.insert(std::make_pair(AI, SSPLK_SmallArray));
+ NeedsProtector = true;
+ }
+ } else {
+ // A call to alloca with a variable size requires protectors.
+ Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ NeedsProtector = true;
+ }
+ continue;
+ }
+
+ bool IsLarge = false;
+ if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
+ Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray
+ : SSPLK_SmallArray));
+ NeedsProtector = true;
+ continue;
+ }
+
+ if (Strong && HasAddressTaken(AI)) {
+ ++NumAddrTaken;
+ Layout.insert(std::make_pair(AI, SSPLK_AddrOf));
+ NeedsProtector = true;
+ }
+ }
+ }
+ }
+
+ return NeedsProtector;
+}
+
+/// Create a stack guard loading and populate whether SelectionDAG SSP is
+/// supported.
+static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
+ IRBuilder<> &B,
+ bool *SupportsSelectionDAGSP = nullptr) {
+ if (Value *Guard = TLI->getIRStackGuard(B))
+ return B.CreateLoad(Guard, true, "StackGuard");
+
+ // Use SelectionDAG SSP handling, since there isn't an IR guard.
+ //
+ // This is more or less weird, since we optionally output whether we
+ // should perform a SelectionDAG SP here. The reason is that it's strictly
+ // defined as !TLI->getIRStackGuard(B), where getIRStackGuard is also
+ // mutating. There is no way to get this bit without mutating the IR, so
+ // getting this bit has to happen in this right time.
+ //
+ // We could have define a new function TLI::supportsSelectionDAGSP(), but that
+ // will put more burden on the backends' overriding work, especially when it
+ // actually conveys the same information getIRStackGuard() already gives.
+ if (SupportsSelectionDAGSP)
+ *SupportsSelectionDAGSP = true;
+ TLI->insertSSPDeclarations(*M);
+ return B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackguard));
+}
+
+/// Insert code into the entry block that stores the stack guard
+/// variable onto the stack:
+///
+/// entry:
+/// StackGuardSlot = alloca i8*
+/// StackGuard = <stack guard>
+/// call void @llvm.stackprotector(StackGuard, StackGuardSlot)
+///
+/// Returns true if the platform/triple supports the stackprotectorcreate pseudo
+/// node.
+static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
+ const TargetLoweringBase *TLI, AllocaInst *&AI) {
+ bool SupportsSelectionDAGSP = false;
+ IRBuilder<> B(&F->getEntryBlock().front());
+ PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+ AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
+
+ Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP);
+ B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+ {GuardSlot, AI});
+ return SupportsSelectionDAGSP;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+/// - The prologue code loads and stores the stack guard onto the stack.
+/// - The epilogue checks the value stored in the prologue against the original
+/// value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+ bool SupportsSelectionDAGSP =
+ EnableSelectionDAGSP && !TM->Options.EnableFastISel;
+ AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
+
+ for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
+ BasicBlock *BB = &*I++;
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ if (!RI)
+ continue;
+
+ // Generate prologue instrumentation if not already generated.
+ if (!HasPrologue) {
+ HasPrologue = true;
+ SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI);
+ }
+
+ // SelectionDAG based code generation. Nothing else needs to be done here.
+ // The epilogue instrumentation is postponed to SelectionDAG.
+ if (SupportsSelectionDAGSP)
+ break;
+
+ // Set HasIRCheck to true, so that SelectionDAG will not generate its own
+ // version. SelectionDAG called 'shouldEmitSDCheck' to check whether
+ // instrumentation has already been generated.
+ HasIRCheck = true;
+
+ // Generate epilogue instrumentation. The epilogue intrumentation can be
+ // function-based or inlined depending on which mechanism the target is
+ // providing.
+ if (Value* GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
+ // Generate the function-based epilogue instrumentation.
+ // The target provides a guard check function, generate a call to it.
+ IRBuilder<> B(RI);
+ LoadInst *Guard = B.CreateLoad(AI, true, "Guard");
+ CallInst *Call = B.CreateCall(GuardCheck, {Guard});
+ llvm::Function *Function = cast<llvm::Function>(GuardCheck);
+ Call->setAttributes(Function->getAttributes());
+ Call->setCallingConv(Function->getCallingConv());
+ } else {
+ // Generate the epilogue with inline instrumentation.
+ // If we do not support SelectionDAG based tail calls, generate IR level
+ // tail calls.
+ //
+ // For each block with a return instruction, convert this:
+ //
+ // return:
+ // ...
+ // ret ...
+ //
+ // into this:
+ //
+ // return:
+ // ...
+ // %1 = <stack guard>
+ // %2 = load StackGuardSlot
+ // %3 = cmp i1 %1, %2
+ // br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+ //
+ // SP_return:
+ // ret ...
+ //
+ // CallStackCheckFailBlk:
+ // call void @__stack_chk_fail()
+ // unreachable
+
+ // Create the FailBB. We duplicate the BB every time since the MI tail
+ // merge pass will merge together all of the various BB into one including
+ // fail BB generated by the stack protector pseudo instruction.
+ BasicBlock *FailBB = CreateFailBB();
+
+ // Split the basic block before the return instruction.
+ BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
+
+ // Update the dominator tree if we need to.
+ if (DT && DT->isReachableFromEntry(BB)) {
+ DT->addNewBlock(NewBB, BB);
+ DT->addNewBlock(FailBB, BB);
+ }
+
+ // Remove default branch instruction to the new BB.
+ BB->getTerminator()->eraseFromParent();
+
+ // Move the newly created basic block to the point right after the old
+ // basic block so that it's in the "fall through" position.
+ NewBB->moveAfter(BB);
+
+ // Generate the stack protector instructions in the old basic block.
+ IRBuilder<> B(BB);
+ Value *Guard = getStackGuard(TLI, M, B);
+ LoadInst *LI2 = B.CreateLoad(AI, true);
+ Value *Cmp = B.CreateICmpEQ(Guard, LI2);
+ auto SuccessProb =
+ BranchProbabilityInfo::getBranchProbStackProtector(true);
+ auto FailureProb =
+ BranchProbabilityInfo::getBranchProbStackProtector(false);
+ MDNode *Weights = MDBuilder(F->getContext())
+ .createBranchWeights(SuccessProb.getNumerator(),
+ FailureProb.getNumerator());
+ B.CreateCondBr(Cmp, NewBB, FailBB, Weights);
+ }
+ }
+
+ // Return if we didn't modify any basic blocks. i.e., there are no return
+ // statements in the function.
+ return HasPrologue;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+ LLVMContext &Context = F->getContext();
+ BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F);
+ IRBuilder<> B(FailBB);
+ B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram()));
+ if (Trip.isOSOpenBSD()) {
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_smash_handler",
+ Type::getVoidTy(Context),
+ Type::getInt8PtrTy(Context), nullptr);
+
+ B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
+ } else {
+ Constant *StackChkFail =
+ M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context),
+ nullptr);
+ B.CreateCall(StackChkFail, {});
+ }
+ B.CreateUnreachable();
+ return FailBB;
+}
+
+bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const {
+ return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator());
+}
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 000000000000..d996714a414a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,469 @@
+//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <vector>
+using namespace llvm;
+
+#define DEBUG_TYPE "stackslotcoloring"
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+ cl::init(false), cl::Hidden,
+ cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
+
+namespace {
+ class StackSlotColoring : public MachineFunctionPass {
+ LiveStacks* LS;
+ MachineFrameInfo *MFI;
+ const TargetInstrInfo *TII;
+ const MachineBlockFrequencyInfo *MBFI;
+
+ // SSIntervals - Spill slot intervals.
+ std::vector<LiveInterval*> SSIntervals;
+
+ // SSRefs - Keep a list of MachineMemOperands for each spill slot.
+ // MachineMemOperands can be shared between instructions, so we need
+ // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not
+ // become FI0 -> FI1 -> FI2.
+ SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs;
+
+ // OrigAlignments - Alignments of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigAlignments;
+
+ // OrigSizes - Sizess of stack objects before coloring.
+ SmallVector<unsigned, 16> OrigSizes;
+
+ // AllColors - If index is set, it's a spill slot, i.e. color.
+ // FIXME: This assumes PEI locate spill slot with smaller indices
+ // closest to stack pointer / frame pointer. Therefore, smaller
+ // index == better color.
+ BitVector AllColors;
+
+ // NextColor - Next "color" that's not yet used.
+ int NextColor;
+
+ // UsedColors - "Colors" that have been assigned.
+ BitVector UsedColors;
+
+ // Assignments - Color to intervals mapping.
+ SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+
+ public:
+ static char ID; // Pass identification
+ StackSlotColoring() :
+ MachineFunctionPass(ID), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveStacks>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ void InitializeSlots();
+ void ScanForSpillSlotRefs(MachineFunction &MF);
+ bool OverlapWithAssignments(LiveInterval *li, int Color) const;
+ int ColorSlot(LiveInterval *li);
+ bool ColorSlots(MachineFunction &MF);
+ void RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping,
+ MachineFunction &MF);
+ bool RemoveDeadStores(MachineBasicBlock* MBB);
+ };
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+char &llvm::StackSlotColoringID = StackSlotColoring::ID;
+
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+
+namespace {
+ // IntervalSorter - Comparison predicate that sort live intervals by
+ // their weight.
+ struct IntervalSorter {
+ bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+ return LHS->weight > RHS->weight;
+ }
+ };
+}
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+ SSRefs.resize(MFI->getObjectIndexEnd());
+
+ // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+ MII != EE; ++MII) {
+ MachineInstr &MI = *MII;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int FI = MO.getIndex();
+ if (FI < 0)
+ continue;
+ if (!LS->hasInterval(FI))
+ continue;
+ LiveInterval &li = LS->getInterval(FI);
+ if (!MI.isDebugValue())
+ li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI);
+ }
+ for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(),
+ EE = MI.memoperands_end();
+ MMOI != EE; ++MMOI) {
+ MachineMemOperand *MMO = *MMOI;
+ if (const FixedStackPseudoSourceValue *FSV =
+ dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ MMO->getPseudoValue())) {
+ int FI = FSV->getFrameIndex();
+ if (FI >= 0)
+ SSRefs[FI].push_back(MMO);
+ }
+ }
+ }
+ }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+ int LastFI = MFI->getObjectIndexEnd();
+ OrigAlignments.resize(LastFI);
+ OrigSizes.resize(LastFI);
+ AllColors.resize(LastFI);
+ UsedColors.resize(LastFI);
+ Assignments.resize(LastFI);
+
+ typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair;
+ SmallVector<Pair *, 16> Intervals;
+ Intervals.reserve(LS->getNumIntervals());
+ for (auto &I : *LS)
+ Intervals.push_back(&I);
+ std::sort(Intervals.begin(), Intervals.end(),
+ [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
+
+ // Gather all spill slots into a list.
+ DEBUG(dbgs() << "Spill slot intervals:\n");
+ for (auto *I : Intervals) {
+ LiveInterval &li = I->second;
+ DEBUG(li.dump());
+ int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
+ if (MFI->isDeadObjectIndex(FI))
+ continue;
+ SSIntervals.push_back(&li);
+ OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+ OrigSizes[FI] = MFI->getObjectSize(FI);
+ AllColors.set(FI);
+ }
+ DEBUG(dbgs() << '\n');
+
+ // Sort them by weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+ // Get first "color".
+ NextColor = AllColors.find_first();
+}
+
+/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
+/// LiveIntervals that have already been assigned to the specified color.
+bool
+StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
+ const SmallVectorImpl<LiveInterval *> &OtherLIs = Assignments[Color];
+ for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
+ LiveInterval *OtherLI = OtherLIs[i];
+ if (OtherLI->overlaps(*li))
+ return true;
+ }
+ return false;
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+///
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+ int Color = -1;
+ bool Share = false;
+ if (!DisableSharing) {
+ // Check if it's possible to reuse any of the used colors.
+ Color = UsedColors.find_first();
+ while (Color != -1) {
+ if (!OverlapWithAssignments(li, Color)) {
+ Share = true;
+ ++NumEliminated;
+ break;
+ }
+ Color = UsedColors.find_next(Color);
+ }
+ }
+
+ // Assign it to the first available color (assumed to be the best) if it's
+ // not possible to share a used color with other objects.
+ if (!Share) {
+ assert(NextColor != -1 && "No more spill slots?");
+ Color = NextColor;
+ UsedColors.set(Color);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ // Record the assignment.
+ Assignments[Color].push_back(li);
+ int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+ DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+
+ // Change size and alignment of the allocated slot. If there are multiple
+ // objects sharing the same slot, then make sure the size and alignment
+ // are large enough for all.
+ unsigned Align = OrigAlignments[FI];
+ if (!Share || Align > MFI->getObjectAlignment(Color))
+ MFI->setObjectAlignment(Color, Align);
+ int64_t Size = OrigSizes[FI];
+ if (!Share || Size > MFI->getObjectSize(Color))
+ MFI->setObjectSize(Color, Size);
+ return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+ unsigned NumObjs = MFI->getObjectIndexEnd();
+ SmallVector<int, 16> SlotMapping(NumObjs, -1);
+ SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+ SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+ BitVector UsedColors(NumObjs);
+
+ DEBUG(dbgs() << "Color spill slot intervals:\n");
+ bool Changed = false;
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int NewSS = ColorSlot(li);
+ assert(NewSS >= 0 && "Stack coloring failed?");
+ SlotMapping[SS] = NewSS;
+ RevMap[NewSS].push_back(SS);
+ SlotWeights[NewSS] += li->weight;
+ UsedColors.set(NewSS);
+ Changed |= (SS != NewSS);
+ }
+
+ DEBUG(dbgs() << "\nSpill slots after coloring:\n");
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+ LiveInterval *li = SSIntervals[i];
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ li->weight = SlotWeights[SS];
+ }
+ // Sort them by new weight.
+ std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
+ DEBUG(SSIntervals[i]->dump());
+ DEBUG(dbgs() << '\n');
+#endif
+
+ if (!Changed)
+ return false;
+
+ // Rewrite all MachineMemOperands.
+ for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+ int NewFI = SlotMapping[SS];
+ if (NewFI == -1 || (NewFI == (int)SS))
+ continue;
+
+ const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI);
+ SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS];
+ for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i)
+ RefMMOs[i]->setValue(NewSV);
+ }
+
+ // Rewrite all MO_FrameIndex operands. Look for dead stores.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB)
+ RewriteInstruction(MI, SlotMapping, MF);
+ RemoveDeadStores(&MBB);
+ }
+
+ // Delete unused stack slots.
+ while (NextColor != -1) {
+ DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
+ MFI->RemoveStackObject(NextColor);
+ NextColor = AllColors.find_next(NextColor);
+ }
+
+ return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
+ SmallVectorImpl<int> &SlotMapping,
+ MachineFunction &MF) {
+ // Update the operands.
+ for (unsigned i = 0, ee = MI.getNumOperands(); i != ee; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isFI())
+ continue;
+ int OldFI = MO.getIndex();
+ if (OldFI < 0)
+ continue;
+ int NewFI = SlotMapping[OldFI];
+ if (NewFI == -1 || NewFI == OldFI)
+ continue;
+ MO.setIndex(NewFI);
+ }
+
+ // The MachineMemOperands have already been updated.
+}
+
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores. If they're both using the same stack slot, then the store is
+/// definitely dead. This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+ // FIXME: This could be much more aggressive, but we need to investigate
+ // the compile time impact of doing so.
+ bool changed = false;
+
+ SmallVector<MachineInstr*, 4> toErase;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (DCELimit != -1 && (int)NumDead >= DCELimit)
+ break;
+
+ int FirstSS, SecondSS;
+ if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS &&
+ FirstSS != -1) {
+ ++NumDead;
+ changed = true;
+ toErase.push_back(&*I);
+ continue;
+ }
+
+ MachineBasicBlock::iterator NextMI = std::next(I);
+ if (NextMI == MBB->end()) continue;
+
+ unsigned LoadReg = 0;
+ unsigned StoreReg = 0;
+ if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
+ continue;
+ if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
+ continue;
+ if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
+
+ ++NumDead;
+ changed = true;
+
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
+ ++NumDead;
+ toErase.push_back(&*I);
+ }
+
+ toErase.push_back(&*NextMI);
+ ++I;
+ }
+
+ for (SmallVectorImpl<MachineInstr *>::iterator I = toErase.begin(),
+ E = toErase.end(); I != E; ++I)
+ (*I)->eraseFromParent();
+
+ return changed;
+}
+
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Stack Slot Coloring **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ MFI = MF.getFrameInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ LS = &getAnalysis<LiveStacks>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+ bool Changed = false;
+
+ unsigned NumSlots = LS->getNumIntervals();
+ if (NumSlots == 0)
+ // Nothing to do!
+ return false;
+
+ // If there are calls to setjmp or sigsetjmp, don't perform stack slot
+ // coloring. The stack could be modified before the longjmp is executed,
+ // resulting in the wrong value being used afterwards. (See
+ // <rdar://problem/8007500>.)
+ if (MF.exposesReturnsTwice())
+ return false;
+
+ // Gather spill slot references
+ ScanForSpillSlotRefs(MF);
+ InitializeSlots();
+ Changed = ColorSlots(MF);
+
+ NextColor = -1;
+ SSIntervals.clear();
+ for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+ SSRefs[i].clear();
+ SSRefs.clear();
+ OrigAlignments.clear();
+ OrigSizes.clear();
+ AllColors.clear();
+ UsedColors.clear();
+ for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
+ Assignments[i].clear();
+ Assignments.clear();
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 000000000000..2b1fb127497a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,65 @@
+//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass duplicates basic blocks ending in unconditional branches into
+// the tails of their predecessors, using the TailDuplicator utility class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "tailduplication"
+
+namespace {
+/// Perform tail duplication. Delegates to TailDuplicator
+class TailDuplicatePass : public MachineFunctionPass {
+ TailDuplicator Duplicator;
+
+public:
+ static char ID;
+ explicit TailDuplicatePass() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+char TailDuplicatePass::ID = 0;
+}
+
+char &llvm::TailDuplicateID = TailDuplicatePass::ID;
+
+INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false,
+ false)
+
+bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ auto MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
+ Duplicator.initMF(MF, MMI, MBPI);
+
+ bool MadeChange = false;
+ while (Duplicator.tailDuplicateBlocks(MF))
+ MadeChange = true;
+
+ return MadeChange;
+}
+
+void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
new file mode 100644
index 000000000000..847a09349a59
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -0,0 +1,932 @@
+//===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility class duplicates basic blocks ending in unconditional branches
+// into the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "tailduplication"
+
+STATISTIC(NumTails, "Number of tails duplicated");
+STATISTIC(NumTailDups, "Number of tail duplicated blocks");
+STATISTIC(NumTailDupAdded,
+ "Number of instructions added due to tail duplication");
+STATISTIC(NumTailDupRemoved,
+ "Number of instructions removed due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumAddedPHIs, "Number of phis added");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDuplicateSize(
+ "tail-dup-size",
+ cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
+ cl::Hidden);
+
+static cl::opt<bool>
+ TailDupVerify("tail-dup-verify",
+ cl::desc("Verify sanity of PHI instructions during taildup"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
+ cl::Hidden);
+
+namespace llvm {
+
+void TailDuplicator::initMF(MachineFunction &MF, const MachineModuleInfo *MMIin,
+ const MachineBranchProbabilityInfo *MBPIin) {
+ TII = MF.getSubtarget().getInstrInfo();
+ TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ MMI = MMIin;
+ MBPI = MBPIin;
+
+ assert(MBPI != nullptr && "Machine Branch Probability Info required");
+
+ PreRegAlloc = MRI->isSSA();
+}
+
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(),
+ MBB->pred_end());
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != MBB->end()) {
+ if (!MI->isPHI())
+ break;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ bool Found = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
+ if (PHIBB == PredBB) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " missing input from predecessor BB#"
+ << PredBB->getNumber() << '\n';
+ llvm_unreachable(nullptr);
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
+ if (CheckExtra && !Preds.count(PHIBB)) {
+ dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": "
+ << *MI;
+ dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber()
+ << '\n';
+ llvm_unreachable(nullptr);
+ }
+ if (PHIBB->getNumber() < 0) {
+ dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
+ llvm_unreachable(nullptr);
+ }
+ }
+ ++MI;
+ }
+ }
+}
+
+/// Tail duplicate the block and cleanup.
+bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
+ MachineBasicBlock *MBB) {
+ // Save the successors list.
+ SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ SmallVector<MachineBasicBlock *, 8> TDBBs;
+ SmallVector<MachineInstr *, 16> Copies;
+ if (!tailDuplicate(MF, IsSimple, MBB, TDBBs, Copies))
+ return false;
+
+ ++NumTails;
+
+ SmallVector<MachineInstr *, 8> NewPHIs;
+ MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+ // TailBB's immediate successors are now successors of those predecessors
+ // which duplicated TailBB. Add the predecessors as sources to the PHI
+ // instructions.
+ bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+ if (PreRegAlloc)
+ updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+ // If it is dead, remove it.
+ if (isDead) {
+ NumTailDupRemoved += MBB->size();
+ removeDeadBlock(MBB);
+ ++NumDeadBlocks;
+ }
+
+ // Update SSA form.
+ if (!SSAUpdateVRs.empty()) {
+ for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+ unsigned VReg = SSAUpdateVRs[i];
+ SSAUpdate.Initialize(VReg);
+
+ // If the original definition is still around, add it as an available
+ // value.
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ MachineBasicBlock *DefBB = nullptr;
+ if (DefMI) {
+ DefBB = DefMI->getParent();
+ SSAUpdate.AddAvailableValue(DefBB, VReg);
+ }
+
+ // Add the new vregs as available values.
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(VReg);
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+ }
+
+ // Rewrite uses that are outside of the original def's block.
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+ while (UI != MRI->use_end()) {
+ MachineOperand &UseMO = *UI;
+ MachineInstr *UseMI = UseMO.getParent();
+ ++UI;
+ if (UseMI->isDebugValue()) {
+ // SSAUpdate can replace the use with an undef. That creates
+ // a debug instruction that is a kill.
+ // FIXME: Should it SSAUpdate job to delete debug instructions
+ // instead of replacing the use with undef?
+ UseMI->eraseFromParent();
+ continue;
+ }
+ if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+ continue;
+ SSAUpdate.RewriteUse(UseMO);
+ }
+ }
+
+ SSAUpdateVRs.clear();
+ SSAUpdateVals.clear();
+ }
+
+ // Eliminate some of the copies inserted by tail duplication to maintain
+ // SSA form.
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ MachineInstr *Copy = Copies[i];
+ if (!Copy->isCopy())
+ continue;
+ unsigned Dst = Copy->getOperand(0).getReg();
+ unsigned Src = Copy->getOperand(1).getReg();
+ if (MRI->hasOneNonDBGUse(Src) &&
+ MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
+ }
+ }
+
+ if (NewPHIs.size())
+ NumAddedPHIs += NewPHIs.size();
+
+ return true;
+}
+
+/// Look for small blocks that are unconditionally branched to and do not fall
+/// through. Tail-duplicate their instructions into their predecessors to
+/// eliminate (dynamic) branches.
+bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ if (PreRegAlloc && TailDupVerify) {
+ DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+ VerifyPHIs(MF, true);
+ }
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E;) {
+ MachineBasicBlock *MBB = &*I++;
+
+ if (NumTails == TailDupLimit)
+ break;
+
+ bool IsSimple = isSimpleBB(MBB);
+
+ if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+ continue;
+
+ MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB);
+ }
+
+ if (PreRegAlloc && TailDupVerify)
+ VerifyPHIs(MF, false);
+
+ return MadeChange;
+}
+
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
+ if (UseMI.isDebugValue())
+ continue;
+ if (UseMI.getParent() != BB)
+ return true;
+ }
+ return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+ if (MI->getOperand(i + 1).getMBB() == SrcBB)
+ return i;
+ return 0;
+}
+
+// Remember which registers are used by phis in this block. This is
+// used to determine which registers are liveout while modifying the
+// block (which is why we need to copy the information).
+static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
+ DenseSet<unsigned> *UsedByPhi) {
+ for (const auto &MI : BB) {
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ UsedByPhi->insert(SrcReg);
+ }
+ }
+}
+
+/// Add a definition and source virtual registers pair for SSA update.
+void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB) {
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(OrigReg);
+ if (LI != SSAUpdateVals.end())
+ LI->second.push_back(std::make_pair(BB, NewReg));
+ else {
+ AvailableValsTy Vals;
+ Vals.push_back(std::make_pair(BB, NewReg));
+ SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+ SSAUpdateVRs.push_back(OrigReg);
+ }
+}
+
+/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
+/// source register that's contributed by PredBB and update SSA update map.
+void TailDuplicator::processPHI(
+ MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
+ DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
+ SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
+ const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
+ unsigned DefReg = MI->getOperand(0).getReg();
+ unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+ assert(SrcOpIdx && "Unable to find matching PHI source?");
+ unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
+
+ // Insert a copy from source to the end of the block. The def register is the
+ // available value liveout of the block.
+ unsigned NewDef = MRI->createVirtualRegister(RC);
+ Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
+ if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
+ addSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+ if (!Remove)
+ return;
+
+ // Remove PredBB from the PHI node.
+ MI->RemoveOperand(SrcOpIdx + 1);
+ MI->RemoveOperand(SrcOpIdx);
+ if (MI->getNumOperands() == 1)
+ MI->eraseFromParent();
+}
+
+/// Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicator::duplicateInstruction(
+ MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
+ const DenseSet<unsigned> &UsedByPhi) {
+ MachineInstr *NewMI = TII->duplicate(*MI, MF);
+ if (PreRegAlloc) {
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
+ if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
+ addSSAUpdateEntry(Reg, NewReg, PredBB);
+ } else {
+ auto VI = LocalVRMap.find(Reg);
+ if (VI != LocalVRMap.end()) {
+ // Need to make sure that the register class of the mapped register
+ // will satisfy the constraints of the class of the register being
+ // replaced.
+ auto *OrigRC = MRI->getRegClass(Reg);
+ auto *MappedRC = MRI->getRegClass(VI->second.Reg);
+ const TargetRegisterClass *ConstrRC;
+ if (VI->second.SubReg != 0) {
+ ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC,
+ VI->second.SubReg);
+ if (ConstrRC) {
+ // The actual constraining (as in "find appropriate new class")
+ // is done by getMatchingSuperRegClass, so now we only need to
+ // change the class of the mapped register.
+ MRI->setRegClass(VI->second.Reg, ConstrRC);
+ }
+ } else {
+ // For mapped registers that do not have sub-registers, simply
+ // restrict their class to match the original one.
+ ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
+ }
+
+ if (ConstrRC) {
+ // If the class constraining succeeded, we can simply replace
+ // the old register with the mapped one.
+ MO.setReg(VI->second.Reg);
+ // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
+ // sub-register, we need to compose the sub-register indices.
+ MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
+ VI->second.SubReg));
+ } else {
+ // The direct replacement is not possible, due to failing register
+ // class constraints. An explicit COPY is necessary. Create one
+ // that can be reused
+ auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
+ if (NewRC == nullptr)
+ NewRC = OrigRC;
+ unsigned NewReg = MRI->createVirtualRegister(NewRC);
+ BuildMI(*PredBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewReg)
+ .addReg(VI->second.Reg, 0, VI->second.SubReg);
+ LocalVRMap.erase(VI);
+ LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
+ MO.setReg(NewReg);
+ // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
+ // is equivalent to the whole register Reg. Hence, Reg:subreg
+ // is same as NewReg:subreg, so keep the sub-register index
+ // unchanged.
+ }
+ // Clear any kill flags from this operand. The new register could
+ // have uses after this one, so kills are not valid here.
+ MO.setIsKill(false);
+ }
+ }
+ }
+ }
+ PredBB->insert(PredBB->instr_end(), NewMI);
+}
+
+/// After FromBB is tail duplicated into its predecessor blocks, the successors
+/// have gained new predecessors. Update the PHI instructions in them
+/// accordingly.
+void TailDuplicator::updateSuccessorsPHIs(
+ MachineBasicBlock *FromBB, bool isDead,
+ SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ SmallSetVector<MachineBasicBlock *, 8> &Succs) {
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator SI = Succs.begin(),
+ SE = Succs.end();
+ SI != SE; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+ II != EE; ++II) {
+ if (!II->isPHI())
+ break;
+ MachineInstrBuilder MIB(*FromBB->getParent(), II);
+ unsigned Idx = 0;
+ for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+ MachineOperand &MO = II->getOperand(i + 1);
+ if (MO.getMBB() == FromBB) {
+ Idx = i;
+ break;
+ }
+ }
+
+ assert(Idx != 0);
+ MachineOperand &MO0 = II->getOperand(Idx);
+ unsigned Reg = MO0.getReg();
+ if (isDead) {
+ // Folded into the previous BB.
+ // There could be duplicate phi source entries. FIXME: Should sdisel
+ // or earlier pass fixed this?
+ for (unsigned i = II->getNumOperands() - 2; i != Idx; i -= 2) {
+ MachineOperand &MO = II->getOperand(i + 1);
+ if (MO.getMBB() == FromBB) {
+ II->RemoveOperand(i + 1);
+ II->RemoveOperand(i);
+ }
+ }
+ } else
+ Idx = 0;
+
+ // If Idx is set, the operands at Idx and Idx+1 must be removed.
+ // We reuse the location to avoid expensive RemoveOperand calls.
+
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(Reg);
+ if (LI != SSAUpdateVals.end()) {
+ // This register is defined in the tail block.
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ // If we didn't duplicate a bb into a particular predecessor, we
+ // might still have added an entry to SSAUpdateVals to correcly
+ // recompute SSA. If that case, avoid adding a dummy extra argument
+ // this PHI.
+ if (!SrcBB->isSuccessor(SuccBB))
+ continue;
+
+ unsigned SrcReg = LI->second[j].second;
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(SrcReg);
+ II->getOperand(Idx + 1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(SrcReg).addMBB(SrcBB);
+ }
+ }
+ } else {
+ // Live in tail block, must also be live in predecessors.
+ for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = TDBBs[j];
+ if (Idx != 0) {
+ II->getOperand(Idx).setReg(Reg);
+ II->getOperand(Idx + 1).setMBB(SrcBB);
+ Idx = 0;
+ } else {
+ MIB.addReg(Reg).addMBB(SrcBB);
+ }
+ }
+ }
+ if (Idx != 0) {
+ II->RemoveOperand(Idx + 1);
+ II->RemoveOperand(Idx);
+ }
+ }
+ }
+}
+
+/// Determine if it is profitable to duplicate this block.
+bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
+ bool IsSimple,
+ MachineBasicBlock &TailBB) {
+ // Only duplicate blocks that end with unconditional branches.
+ if (TailBB.canFallThrough())
+ return false;
+
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB.isSuccessor(&TailBB))
+ return false;
+
+ // Set the limit on the cost to duplicate. When optimizing for size,
+ // duplicate only one, because one branch instruction can be eliminated to
+ // compensate for the duplication.
+ unsigned MaxDuplicateCount;
+ if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ // FIXME: Use Function::optForSize().
+ MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
+ MaxDuplicateCount = 1;
+ else
+ MaxDuplicateCount = TailDuplicateSize;
+
+ // If the target has hardware branch prediction that can handle indirect
+ // branches, duplicating them can often make them predictable when there
+ // are common paths through the code. The limit needs to be high enough
+ // to allow undoing the effects of tail merging and other optimizations
+ // that rearrange the predecessors of the indirect branch.
+
+ bool HasIndirectbr = false;
+ if (!TailBB.empty())
+ HasIndirectbr = TailBB.back().isIndirectBranch();
+
+ if (HasIndirectbr && PreRegAlloc)
+ MaxDuplicateCount = 20;
+
+ // Check the instructions in the block to determine whether tail-duplication
+ // is invalid or unlikely to be profitable.
+ unsigned InstrCount = 0;
+ for (MachineInstr &MI : TailBB) {
+ // Non-duplicable things shouldn't be tail-duplicated.
+ if (MI.isNotDuplicable())
+ return false;
+
+ // Convergent instructions can be duplicated only if doing so doesn't add
+ // new control dependencies, which is what we're going to do here.
+ if (MI.isConvergent())
+ return false;
+
+ // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+ // A return may expand into a lot more instructions (e.g. reload of callee
+ // saved registers) after PEI.
+ if (PreRegAlloc && MI.isReturn())
+ return false;
+
+ // Avoid duplicating calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (PreRegAlloc && MI.isCall())
+ return false;
+
+ if (!MI.isPHI() && !MI.isDebugValue())
+ InstrCount += 1;
+
+ if (InstrCount > MaxDuplicateCount)
+ return false;
+ }
+
+ // Check if any of the successors of TailBB has a PHI node in which the
+ // value corresponding to TailBB uses a subregister.
+ // If a phi node uses a register paired with a subregister, the actual
+ // "value type" of the phi may differ from the type of the register without
+ // any subregisters. Due to a bug, tail duplication may add a new operand
+ // without a necessary subregister, producing an invalid code. This is
+ // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
+ // Disable tail duplication for this case for now, until the problem is
+ // fixed.
+ for (auto SB : TailBB.successors()) {
+ for (auto &I : *SB) {
+ if (!I.isPHI())
+ break;
+ unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
+ assert(Idx != 0);
+ MachineOperand &PU = I.getOperand(Idx);
+ if (PU.getSubReg() != 0)
+ return false;
+ }
+ }
+
+ if (HasIndirectbr && PreRegAlloc)
+ return true;
+
+ if (IsSimple)
+ return true;
+
+ if (!PreRegAlloc)
+ return true;
+
+ return canCompletelyDuplicateBB(TailBB);
+}
+
+/// True if this BB has only one unconditional jump.
+bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) {
+ if (TailBB->succ_size() != 1)
+ return false;
+ if (TailBB->pred_empty())
+ return false;
+ MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr();
+ if (I == TailBB->end())
+ return true;
+ return I->isUnconditionalBranch();
+}
+
+static bool bothUsedInPHI(const MachineBasicBlock &A,
+ const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) {
+ for (MachineBasicBlock *BB : A.successors())
+ if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+ return true;
+
+ return false;
+}
+
+bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+ for (MachineBasicBlock *PredBB : BB.predecessors()) {
+ if (PredBB->succ_size() > 1)
+ return false;
+
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ return false;
+
+ if (!PredCond.empty())
+ return false;
+ }
+ return true;
+}
+
+bool TailDuplicator::duplicateSimpleBB(
+ MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ const DenseSet<unsigned> &UsedByPhi,
+ SmallVectorImpl<MachineInstr *> &Copies) {
+ SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
+ TailBB->succ_end());
+ SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ bool Changed = false;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ if (PredBB->hasEHPadSuccessor())
+ continue;
+
+ if (bothUsedInPHI(*PredBB, Succs))
+ continue;
+
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+
+ Changed = true;
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From simple Succ: " << *TailBB);
+
+ MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+ MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator());
+
+ // Make PredFBB explicit.
+ if (PredCond.empty())
+ PredFBB = PredTBB;
+
+ // Make fall through explicit.
+ if (!PredTBB)
+ PredTBB = NextBB;
+ if (!PredFBB)
+ PredFBB = NextBB;
+
+ // Redirect
+ if (PredFBB == TailBB)
+ PredFBB = NewTarget;
+ if (PredTBB == TailBB)
+ PredTBB = NewTarget;
+
+ // Make the branch unconditional if possible
+ if (PredTBB == PredFBB) {
+ PredCond.clear();
+ PredFBB = nullptr;
+ }
+
+ // Avoid adding fall through branches.
+ if (PredFBB == NextBB)
+ PredFBB = nullptr;
+ if (PredTBB == NextBB && PredFBB == nullptr)
+ PredTBB = nullptr;
+
+ TII->RemoveBranch(*PredBB);
+
+ if (!PredBB->isSuccessor(NewTarget))
+ PredBB->replaceSuccessor(TailBB, NewTarget);
+ else {
+ PredBB->removeSuccessor(TailBB, true);
+ assert(PredBB->succ_size() <= 1);
+ }
+
+ if (PredTBB)
+ TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+ TDBBs.push_back(PredBB);
+ }
+ return Changed;
+}
+
+/// If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
+ MachineBasicBlock *TailBB,
+ SmallVectorImpl<MachineBasicBlock *> &TDBBs,
+ SmallVectorImpl<MachineInstr *> &Copies) {
+ DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
+ DenseSet<unsigned> UsedByPhi;
+ getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+ if (IsSimple)
+ return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
+ // Iterate through all the unique predecessors and tail-duplicate this
+ // block into them, if possible. Copying the list ahead of time also
+ // avoids trouble with the predecessor list reallocating.
+ bool Changed = false;
+ SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ assert(TailBB != PredBB &&
+ "Single-block loop should have been rejected earlier!");
+ // EH edges are ignored by AnalyzeBranch.
+ if (PredBB->succ_size() > 1)
+ continue;
+
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+ if (!PredCond.empty())
+ continue;
+ // Don't duplicate into a fall-through predecessor (at least for now).
+ if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+ continue;
+
+ DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
+
+ TDBBs.push_back(PredBB);
+
+ // Remove PredBB's unconditional branch.
+ TII->RemoveBranch(*PredBB);
+
+ // Clone the contents of TailBB into PredBB.
+ DenseMap<unsigned, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
+ // Use instr_iterator here to properly handle bundles, e.g.
+ // ARM Thumb2 IT block.
+ MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
+ while (I != TailBB->instr_end()) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ } else {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
+ }
+ }
+ appendCopies(PredBB, CopyInfos, Copies);
+
+ // Simplify
+ TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
+ NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
+
+ // Update the CFG.
+ PredBB->removeSuccessor(PredBB->succ_begin());
+ assert(PredBB->succ_empty() &&
+ "TailDuplicate called on block with multiple successors!");
+ for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+ E = TailBB->succ_end();
+ I != E; ++I)
+ PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I));
+
+ Changed = true;
+ ++NumTailDups;
+ }
+
+ // If TailBB was duplicated into all its predecessors except for the prior
+ // block, which falls through unconditionally, move the contents of this
+ // block into the prior block.
+ MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
+ MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
+ SmallVector<MachineOperand, 4> PriorCond;
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PrevBB->succ_size() == 1 &&
+ !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
+ PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
+ !TailBB->hasAddressTaken()) {
+ DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+ << "From MBB: " << *TailBB);
+ if (PreRegAlloc) {
+ DenseMap<unsigned, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ }
+
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
+ duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
+ MI->eraseFromParent();
+ }
+ appendCopies(PrevBB, CopyInfos, Copies);
+ } else {
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+ }
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ }
+
+ // If this is after register allocation, there are no phis to fix.
+ if (!PreRegAlloc)
+ return Changed;
+
+ // If we made no changes so far, we are safe.
+ if (!Changed)
+ return Changed;
+
+ // Handle the nasty case in that we duplicated a block that is part of a loop
+ // into some but not all of its predecessors. For example:
+ // 1 -> 2 <-> 3 |
+ // \ |
+ // \---> rest |
+ // if we duplicate 2 into 1 but not into 3, we end up with
+ // 12 -> 3 <-> 2 -> rest |
+ // \ / |
+ // \----->-----/ |
+ // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
+ // with a phi in 3 (which now dominates 2).
+ // What we do here is introduce a copy in 3 of the register defined by the
+ // phi, just like when we are duplicating 2 into 3, but we don't copy any
+ // real instructions or remove the 3 -> 2 edge from the phi in 2.
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end();
+ PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+ continue;
+
+ // EH edges
+ if (PredBB->succ_size() != 1)
+ continue;
+
+ DenseMap<unsigned, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+ }
+ appendCopies(PredBB, CopyInfos, Copies);
+ }
+
+ return Changed;
+}
+
+/// At the end of the block \p MBB generate COPY instructions between registers
+/// described by \p CopyInfos. Append resulting instructions to \p Copies.
+void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
+ SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
+ SmallVectorImpl<MachineInstr*> &Copies) {
+ MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
+ const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
+ for (auto &CI : CopyInfos) {
+ auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
+ .addReg(CI.second.Reg, 0, CI.second.SubReg);
+ Copies.push_back(C);
+ }
+}
+
+/// Remove the specified dead machine basic block from the function, updating
+/// the CFG.
+void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
+ assert(MBB->pred_empty() && "MBB must be dead!");
+ DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+ // Remove all successors.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_end() - 1);
+
+ // Remove the block.
+ MBB->eraseFromParent();
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
new file mode 100644
index 000000000000..cac7e63af328
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -0,0 +1,104 @@
+//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// The default implementation just looks at attribute "no-frame-pointer-elim".
+bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
+ auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim");
+ return Attr.getValueAsString() == "true";
+}
+
+/// Returns the displacement from the frame register to the stack
+/// frame of the specified index, along with the frame register used
+/// (in output arg FrameReg). This is the default implementation which
+/// is overridden for some targets.
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+ // By default, assume all frame indices are referenced via whatever
+ // getFrameRegister() says. The target can override this if it's doing
+ // something different.
+ FrameReg = RI->getFrameRegister(MF);
+
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+bool TargetFrameLowering::needsFrameIndexResolution(
+ const MachineFunction &MF) const {
+ return MF.getFrameInfo()->hasStackObjects();
+}
+
+void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
+ // Resize before the early returns. Some backends expect that
+ // SavedRegs.size() == TRI.getNumRegs() after this call even if there are no
+ // saved registers.
+ SavedRegs.resize(TRI.getNumRegs());
+
+ // When interprocedural register allocation is enabled caller saved registers
+ // are preferred over callee saved registers.
+ if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction()))
+ return;
+
+ // Get the callee saved register list...
+ const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+
+ // Early exit if there are no callee saved registers.
+ if (!CSRegs || CSRegs[0] == 0)
+ return;
+
+ // In Naked functions we aren't going to save any registers.
+ if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+ return;
+
+ // Functions which call __builtin_unwind_init get all their registers saved.
+ bool CallsUnwindInit = MF.getMMI().callsUnwindInit();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (CallsUnwindInit || MRI.isPhysRegModified(Reg))
+ SavedRegs.set(Reg);
+ }
+}
+
+unsigned TargetFrameLowering::getStackAlignmentSkew(
+ const MachineFunction &MF) const {
+ // When HHVM function is called, the stack is skewed as the return address
+ // is removed from the stack before we enter the function.
+ if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM))
+ return MF.getTarget().getPointerSize();
+
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
new file mode 100644
index 000000000000..e7330c60ed23
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -0,0 +1,1195 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
+
+using namespace llvm;
+
+static cl::opt<bool> DisableHazardRecognizer(
+ "disable-sched-hazard", cl::Hidden, cl::init(false),
+ cl::desc("Disable hazard detection during preRA scheduling"));
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const {
+ if (OpNum >= MCID.getNumOperands())
+ return nullptr;
+
+ short RegClass = MCID.OpInfo[OpNum].RegClass;
+ if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+ return TRI->getPointerRegClass(MF, RegClass);
+
+ // Instructions like INSERT_SUBREG do not have fixed register classes.
+ if (RegClass < 0)
+ return nullptr;
+
+ // Otherwise just look it up normally.
+ return TRI->getRegClass(RegClass);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned InstCount = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0) {
+ atInsnStart = true;
+ } else if (strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0) {
+ // Stop counting as an instruction after a comment until the next
+ // separator.
+ atInsnStart = false;
+ }
+
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ ++InstCount;
+ atInsnStart = false;
+ }
+ }
+
+ return InstCount * MAI.getMaxInstLength();
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+
+ // Remove all the old successors of MBB from the CFG.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_begin());
+
+ // Save off the debug loc before erasing the instruction.
+ DebugLoc DL = Tail->getDebugLoc();
+
+ // Remove all the dead instructions from the end of MBB.
+ MBB->erase(Tail, MBB->end());
+
+ // If MBB isn't immediately before MBB, insert a branch to it.
+ if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+ InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL);
+ MBB->addSuccessor(NewDest);
+}
+
+MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
+ bool NewMI, unsigned Idx1,
+ unsigned Idx2) const {
+ const MCInstrDesc &MCID = MI.getDesc();
+ bool HasDef = MCID.getNumDefs();
+ if (HasDef && !MI.getOperand(0).isReg())
+ // No idea how to commute this instruction. Target should implement its own.
+ return nullptr;
+
+ unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1;
+ unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2;
+ assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) &&
+ CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 &&
+ "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands.");
+ assert(MI.getOperand(Idx1).isReg() && MI.getOperand(Idx2).isReg() &&
+ "This only knows how to commute register operands so far");
+
+ unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0;
+ unsigned Reg1 = MI.getOperand(Idx1).getReg();
+ unsigned Reg2 = MI.getOperand(Idx2).getReg();
+ unsigned SubReg0 = HasDef ? MI.getOperand(0).getSubReg() : 0;
+ unsigned SubReg1 = MI.getOperand(Idx1).getSubReg();
+ unsigned SubReg2 = MI.getOperand(Idx2).getSubReg();
+ bool Reg1IsKill = MI.getOperand(Idx1).isKill();
+ bool Reg2IsKill = MI.getOperand(Idx2).isKill();
+ bool Reg1IsUndef = MI.getOperand(Idx1).isUndef();
+ bool Reg2IsUndef = MI.getOperand(Idx2).isUndef();
+ bool Reg1IsInternal = MI.getOperand(Idx1).isInternalRead();
+ bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead();
+ // If destination is tied to either of the commuted source register, then
+ // it must be updated.
+ if (HasDef && Reg0 == Reg1 &&
+ MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
+ Reg2IsKill = false;
+ Reg0 = Reg2;
+ SubReg0 = SubReg2;
+ } else if (HasDef && Reg0 == Reg2 &&
+ MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+ Reg1IsKill = false;
+ Reg0 = Reg1;
+ SubReg0 = SubReg1;
+ }
+
+ MachineInstr *CommutedMI = nullptr;
+ if (NewMI) {
+ // Create a new instruction.
+ MachineFunction &MF = *MI.getParent()->getParent();
+ CommutedMI = MF.CloneMachineInstr(&MI);
+ } else {
+ CommutedMI = &MI;
+ }
+
+ if (HasDef) {
+ CommutedMI->getOperand(0).setReg(Reg0);
+ CommutedMI->getOperand(0).setSubReg(SubReg0);
+ }
+ CommutedMI->getOperand(Idx2).setReg(Reg1);
+ CommutedMI->getOperand(Idx1).setReg(Reg2);
+ CommutedMI->getOperand(Idx2).setSubReg(SubReg1);
+ CommutedMI->getOperand(Idx1).setSubReg(SubReg2);
+ CommutedMI->getOperand(Idx2).setIsKill(Reg1IsKill);
+ CommutedMI->getOperand(Idx1).setIsKill(Reg2IsKill);
+ CommutedMI->getOperand(Idx2).setIsUndef(Reg1IsUndef);
+ CommutedMI->getOperand(Idx1).setIsUndef(Reg2IsUndef);
+ CommutedMI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal);
+ CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
+ return CommutedMI;
+}
+
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr &MI, bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
+ // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose
+ // any commutable operand, which is done in findCommutedOpIndices() method
+ // called below.
+ if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) &&
+ !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) {
+ assert(MI.isCommutable() &&
+ "Precondition violation: MI must be commutable.");
+ return nullptr;
+ }
+ return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+}
+
+bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1,
+ unsigned &ResultIdx2,
+ unsigned CommutableOpIdx1,
+ unsigned CommutableOpIdx2) {
+ if (ResultIdx1 == CommuteAnyOperandIndex &&
+ ResultIdx2 == CommuteAnyOperandIndex) {
+ ResultIdx1 = CommutableOpIdx1;
+ ResultIdx2 = CommutableOpIdx2;
+ } else if (ResultIdx1 == CommuteAnyOperandIndex) {
+ if (ResultIdx2 == CommutableOpIdx1)
+ ResultIdx1 = CommutableOpIdx2;
+ else if (ResultIdx2 == CommutableOpIdx2)
+ ResultIdx1 = CommutableOpIdx1;
+ else
+ return false;
+ } else if (ResultIdx2 == CommuteAnyOperandIndex) {
+ if (ResultIdx1 == CommutableOpIdx1)
+ ResultIdx2 = CommutableOpIdx2;
+ else if (ResultIdx1 == CommutableOpIdx2)
+ ResultIdx2 = CommutableOpIdx1;
+ else
+ return false;
+ } else
+ // Check that the result operand indices match the given commutable
+ // operand indices.
+ return (ResultIdx1 == CommutableOpIdx1 && ResultIdx2 == CommutableOpIdx2) ||
+ (ResultIdx1 == CommutableOpIdx2 && ResultIdx2 == CommutableOpIdx1);
+
+ return true;
+}
+
+bool TargetInstrInfo::findCommutedOpIndices(MachineInstr &MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ assert(!MI.isBundle() &&
+ "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (!MCID.isCommutable())
+ return false;
+
+ // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+ // is not true, then the target must implement this.
+ unsigned CommutableOpIdx1 = MCID.getNumDefs();
+ unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1;
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+ CommutableOpIdx1, CommutableOpIdx2))
+ return false;
+
+ if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
+ // No idea.
+ return false;
+ return true;
+}
+
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
+ if (!MI.isTerminator()) return false;
+
+ // Conditional branch is a special case.
+ if (MI.isBranch() && !MI.isBarrier())
+ return true;
+ if (!MI.isPredicable())
+ return true;
+ return !isPredicated(MI);
+}
+
+bool TargetInstrInfo::PredicateInstruction(
+ MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {
+ bool MadeChange = false;
+
+ assert(!MI.isBundle() &&
+ "TargetInstrInfo::PredicateInstruction() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (!MI.isPredicable())
+ return false;
+
+ for (unsigned j = 0, i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (MCID.OpInfo[i].isPredicate()) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ MO.setReg(Pred[j].getReg());
+ MadeChange = true;
+ } else if (MO.isImm()) {
+ MO.setImm(Pred[j].getImm());
+ MadeChange = true;
+ } else if (MO.isMBB()) {
+ MO.setMBB(Pred[j].getMBB());
+ MadeChange = true;
+ }
+ ++j;
+ }
+ }
+ return MadeChange;
+}
+
+bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
+ oe = MI.memoperands_end();
+ o != oe; ++o) {
+ if ((*o)->isLoad()) {
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ (*o)->getPseudoValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI.memoperands_begin(),
+ oe = MI.memoperands_end();
+ o != oe; ++o) {
+ if ((*o)->isStore()) {
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ (*o)->getPseudoValue())) {
+ FrameIndex = Value->getFrameIndex();
+ MMO = *o;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
+ unsigned SubIdx, unsigned &Size,
+ unsigned &Offset,
+ const MachineFunction &MF) const {
+ if (!SubIdx) {
+ Size = RC->getSize();
+ Offset = 0;
+ return true;
+ }
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
+ // Convert bit size to byte size to be consistent with
+ // MCRegisterClass::getSize().
+ if (BitSize % 8)
+ return false;
+
+ int BitOffset = TRI->getSubRegIdxOffset(SubIdx);
+ if (BitOffset < 0 || BitOffset % 8)
+ return false;
+
+ Size = BitSize /= 8;
+ Offset = (unsigned)BitOffset / 8;
+
+ assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
+
+ if (!MF.getDataLayout().isLittleEndian()) {
+ Offset = RC->getSize() - (Offset + Size);
+ }
+ return true;
+}
+
+void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr &Orig,
+ const TargetRegisterInfo &TRI) const {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
+ MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+ MBB.insert(I, MI);
+}
+
+bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0,
+ const MachineInstr &MI1,
+ const MachineRegisterInfo *MRI) const {
+ return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+MachineInstr *TargetInstrInfo::duplicate(MachineInstr &Orig,
+ MachineFunction &MF) const {
+ assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated");
+ return MF.CloneMachineInstr(&Orig);
+}
+
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
+ unsigned FoldIdx) {
+ assert(MI.isCopy() && "MI must be a COPY instruction");
+ if (MI.getNumOperands() != 2)
+ return nullptr;
+ assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+ const MachineOperand &FoldOp = MI.getOperand(FoldIdx);
+ const MachineOperand &LiveOp = MI.getOperand(1 - FoldIdx);
+
+ if (FoldOp.getSubReg() || LiveOp.getSubReg())
+ return nullptr;
+
+ unsigned FoldReg = FoldOp.getReg();
+ unsigned LiveReg = LiveOp.getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
+ "Cannot fold physregs");
+
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+ if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+ return RC->contains(LiveOp.getReg()) ? RC : nullptr;
+
+ if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
+ return RC;
+
+ // FIXME: Allow folding when register classes are memory compatible.
+ return nullptr;
+}
+
+void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ llvm_unreachable("Not a MachO target");
+}
+
+static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
+ ArrayRef<unsigned> Ops, int FrameIndex,
+ const TargetInstrInfo &TII) {
+ unsigned StartIdx = 0;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::STACKMAP:
+ StartIdx = 2; // Skip ID, nShadowBytes.
+ break;
+ case TargetOpcode::PATCHPOINT: {
+ // For PatchPoint, the call args are not foldable.
+ PatchPointOpers opers(&MI);
+ StartIdx = opers.getVarIdx();
+ break;
+ }
+ default:
+ llvm_unreachable("unexpected stackmap opcode");
+ }
+
+ // Return false if any operands requested for folding are not foldable (not
+ // part of the stackmap's live values).
+ for (unsigned Op : Ops) {
+ if (Op < StartIdx)
+ return nullptr;
+ }
+
+ MachineInstr *NewMI =
+ MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, NewMI);
+
+ // No need to fold return, the meta data, and function arguments
+ for (unsigned i = 0; i < StartIdx; ++i)
+ MIB.addOperand(MI.getOperand(i));
+
+ for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) {
+ unsigned SpillSize;
+ unsigned SpillOffset;
+ // Compute the spill slot size and offset.
+ const TargetRegisterClass *RC =
+ MF.getRegInfo().getRegClass(MO.getReg());
+ bool Valid =
+ TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF);
+ if (!Valid)
+ report_fatal_error("cannot spill patchpoint subregister operand");
+ MIB.addImm(StackMaps::IndirectMemRefOp);
+ MIB.addImm(SpillSize);
+ MIB.addFrameIndex(FrameIndex);
+ MIB.addImm(SpillOffset);
+ }
+ else
+ MIB.addOperand(MO);
+ }
+ return NewMI;
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
+ ArrayRef<unsigned> Ops, int FI,
+ LiveIntervals *LIS) const {
+ auto Flags = MachineMemOperand::MONone;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ if (MI.getOperand(Ops[i]).isDef())
+ Flags |= MachineMemOperand::MOStore;
+ else
+ Flags |= MachineMemOperand::MOLoad;
+
+ MachineBasicBlock *MBB = MI.getParent();
+ assert(MBB && "foldMemoryOperand needs an inserted instruction");
+ MachineFunction &MF = *MBB->getParent();
+
+ MachineInstr *NewMI = nullptr;
+
+ if (MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ // Fold stackmap/patchpoint.
+ NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
+ if (NewMI)
+ MBB->insert(MI, NewMI);
+ } else {
+ // Ask the target to do the actual folding.
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS);
+ }
+
+ if (NewMI) {
+ NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->mayLoad()) &&
+ "Folded a use to a non-load!");
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ assert(MFI.getObjectOffset(FI) != -1);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ NewMI->addMemOperand(MF, MMO);
+
+ return NewMI;
+ }
+
+ // Straight COPY may fold as load/store.
+ if (!MI.isCopy() || Ops.size() != 1)
+ return nullptr;
+
+ const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ if (!RC)
+ return nullptr;
+
+ const MachineOperand &MO = MI.getOperand(1 - Ops[0]);
+ MachineBasicBlock::iterator Pos = MI;
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ if (Flags == MachineMemOperand::MOStore)
+ storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+ else
+ loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+ return &*--Pos;
+}
+
+bool TargetInstrInfo::hasReassociableOperands(
+ const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
+ const MachineOperand &Op1 = Inst.getOperand(1);
+ const MachineOperand &Op2 = Inst.getOperand(2);
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ // We need virtual register definitions for the operands that we will
+ // reassociate.
+ MachineInstr *MI1 = nullptr;
+ MachineInstr *MI2 = nullptr;
+ if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+ MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+ if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+ MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+
+ // And they need to be in the trace (otherwise, they won't have a depth).
+ return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB;
+}
+
+bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
+ bool &Commuted) const {
+ const MachineBasicBlock *MBB = Inst.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
+ MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+ unsigned AssocOpcode = Inst.getOpcode();
+
+ // If only one operand has the same opcode and it's the second source operand,
+ // the operands must be commuted.
+ Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+ if (Commuted)
+ std::swap(MI1, MI2);
+
+ // 1. The previous instruction must be the same type as Inst.
+ // 2. The previous instruction must have virtual register definitions for its
+ // operands in the same basic block as Inst.
+ // 3. The previous instruction's result must only be used by Inst.
+ return MI1->getOpcode() == AssocOpcode &&
+ hasReassociableOperands(*MI1, MBB) &&
+ MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
+}
+
+// 1. The operation must be associative and commutative.
+// 2. The instruction must have virtual register definitions for its
+// operands in the same basic block.
+// 3. The instruction must have a reassociable sibling.
+bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
+ bool &Commuted) const {
+ return isAssociativeAndCommutative(Inst) &&
+ hasReassociableOperands(Inst, Inst.getParent()) &&
+ hasReassociableSibling(Inst, Commuted);
+}
+
+// The concept of the reassociation pass is that these operations can benefit
+// from this kind of transformation:
+//
+// A = ? op ?
+// B = A op X (Prev)
+// C = B op Y (Root)
+// -->
+// A = ? op ?
+// B = X op Y
+// C = A op B
+//
+// breaking the dependency between A and B, allowing them to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+
+// FIXME: This has the potential to be expensive (compile time) while not
+// improving the code at all. Some ways to limit the overhead:
+// 1. Track successful transforms; bail out if hit rate gets too low.
+// 2. Only enable at -O3 or some other non-default optimization level.
+// 3. Pre-screen pattern candidates here: if an operand of the previous
+// instruction is known to not increase the critical path, then don't match
+// that pattern.
+bool TargetInstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ bool Commute;
+ if (isReassociationCandidate(Root, Commute)) {
+ // We found a sequence of instructions that may be suitable for a
+ // reassociation of operands to increase ILP. Specify each commutation
+ // possibility for the Prev instruction in the sequence and let the
+ // machine combiner decide if changing the operands is worthwhile.
+ if (Commute) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_AX_YB);
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XA_YB);
+ } else {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_AX_BY);
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XA_BY);
+ }
+ return true;
+ }
+
+ return false;
+}
+/// Return true when a code sequence can improve loop throughput.
+bool
+TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+ return false;
+}
+/// Attempt the reassociation transformation to reduce critical path length.
+/// See the above comments before getMachineCombinerPatterns().
+void TargetInstrInfo::reassociateOps(
+ MachineInstr &Root, MachineInstr &Prev,
+ MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ MachineFunction *MF = Root.getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+ // This array encodes the operand index for each parameter because the
+ // operands may be commuted. Each row corresponds to a pattern value,
+ // and each column specifies the index of A, B, X, Y.
+ unsigned OpIdx[4][4] = {
+ { 1, 1, 2, 2 },
+ { 1, 2, 2, 1 },
+ { 2, 1, 1, 2 },
+ { 2, 2, 1, 1 }
+ };
+
+ int Row;
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break;
+ case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break;
+ case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break;
+ case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break;
+ default: llvm_unreachable("unexpected MachineCombinerPattern");
+ }
+
+ MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]);
+ MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]);
+ MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]);
+ MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
+ MachineOperand &OpC = Root.getOperand(0);
+
+ unsigned RegA = OpA.getReg();
+ unsigned RegB = OpB.getReg();
+ unsigned RegX = OpX.getReg();
+ unsigned RegY = OpY.getReg();
+ unsigned RegC = OpC.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA))
+ MRI.constrainRegClass(RegA, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI.constrainRegClass(RegB, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegX))
+ MRI.constrainRegClass(RegX, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegY))
+ MRI.constrainRegClass(RegY, RC);
+ if (TargetRegisterInfo::isVirtualRegister(RegC))
+ MRI.constrainRegClass(RegC, RC);
+
+ // Create a new virtual register for the result of (X op Y) instead of
+ // recycling RegB because the MachineCombiner's computation of the critical
+ // path requires a new register definition rather than an existing one.
+ unsigned NewVR = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+ unsigned Opcode = Root.getOpcode();
+ bool KillA = OpA.isKill();
+ bool KillX = OpX.isKill();
+ bool KillY = OpY.isKill();
+
+ // Create new instructions for insertion.
+ MachineInstrBuilder MIB1 =
+ BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegY, getKillRegState(KillY));
+ MachineInstrBuilder MIB2 =
+ BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+ .addReg(RegA, getKillRegState(KillA))
+ .addReg(NewVR, getKillRegState(true));
+
+ setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
+
+ // Record new instructions for insertion and old instructions for deletion.
+ InsInstrs.push_back(MIB1);
+ InsInstrs.push_back(MIB2);
+ DelInstrs.push_back(&Prev);
+ DelInstrs.push_back(&Root);
+}
+
+void TargetInstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+
+ // Select the previous instruction in the sequence based on the input pattern.
+ MachineInstr *Prev = nullptr;
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ break;
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+ break;
+ default:
+ break;
+ }
+
+ assert(Prev && "Unknown pattern for machine combiner");
+
+ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr &LoadMI,
+ LiveIntervals *LIS) const {
+ assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ assert(MI.getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+
+ // Ask the target to do the actual folding.
+ MachineInstr *NewMI = nullptr;
+ int FrameIndex = 0;
+
+ if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT) &&
+ isLoadFromStackSlot(LoadMI, FrameIndex)) {
+ // Fold stackmap/patchpoint.
+ NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
+ if (NewMI)
+ NewMI = &*MBB.insert(MI, NewMI);
+ } else {
+ // Ask the target to do the actual folding.
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
+ }
+
+ if (!NewMI) return nullptr;
+
+ // Copy the memoperands from the load to the folded instruction.
+ if (MI.memoperands_empty()) {
+ NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end());
+ }
+ else {
+ // Handle the rare case of folding multiple loads.
+ NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(),
+ E = LoadMI.memoperands_end();
+ I != E; ++I) {
+ NewMI->addMemOperand(MF, *I);
+ }
+ }
+ return NewMI;
+}
+
+bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
+ const MachineInstr &MI, AliasAnalysis *AA) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Remat clients assume operand 0 is the defined register.
+ if (!MI.getNumOperands() || !MI.getOperand(0).isReg())
+ return false;
+ unsigned DefReg = MI.getOperand(0).getReg();
+
+ // A sub-register definition can only be rematerialized if the instruction
+ // doesn't read the other parts of the register. Otherwise it is really a
+ // read-modify-write operation on the full virtual register which cannot be
+ // moved safely.
+ if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
+ MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg))
+ return false;
+
+ // A load from a fixed stack slot can be rematerialized. This may be
+ // redundant with subsequent checks, but it's target-independent,
+ // simple, and a common case.
+ int FrameIdx = 0;
+ if (isLoadFromStackSlot(MI, FrameIdx) &&
+ MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+ return true;
+
+ // Avoid instructions obviously unsafe for remat.
+ if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects())
+ return false;
+
+ // Don't remat inline asm. We have no idea how expensive it is
+ // even if it's side effect free.
+ if (MI.isInlineAsm())
+ return false;
+
+ // Avoid instructions which load from potentially varying memory.
+ if (MI.mayLoad() && !MI.isInvariantLoad(AA))
+ return false;
+
+ // If any of the registers accessed are non-constant, conservatively assume
+ // the instruction is not rematerializable.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // Check for a well-behaved physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ if (!MRI.isConstantPhysReg(Reg, MF))
+ return false;
+ } else {
+ // A physreg def. We can't remat it.
+ return false;
+ }
+ continue;
+ }
+
+ // Only allow one virtual-register def. There may be multiple defs of the
+ // same virtual register, though.
+ if (MO.isDef() && Reg != DefReg)
+ return false;
+
+ // Don't allow any virtual-register uses. Rematting an instruction with
+ // virtual register uses would length the live ranges of the uses, which
+ // is not necessarily a good idea, certainly not "trivial".
+ if (MO.isUse())
+ return false;
+ }
+
+ // Everything checked out.
+ return true;
+}
+
+int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ bool StackGrowsDown =
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
+
+ if (MI.getOpcode() != FrameSetupOpcode &&
+ MI.getOpcode() != FrameDestroyOpcode)
+ return 0;
+
+ int SPAdj = MI.getOperand(0).getImm();
+ SPAdj = TFI->alignSPAdjust(SPAdj);
+
+ if ((!StackGrowsDown && MI.getOpcode() == FrameSetupOpcode) ||
+ (StackGrowsDown && MI.getOpcode() == FrameDestroyOpcode))
+ SPAdj = -SPAdj;
+
+ return SPAdj;
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Terminators and labels can't be scheduled around.
+ if (MI.isTerminator() || MI.isPosition())
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ return MI.modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI);
+}
+
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfo::usePreRAHazardRecognizer() const {
+ return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
+ // Dummy hazard recognizer allows all instructions to issue.
+ return new ScheduleHazardRecognizer();
+}
+
+// Default implementation of CreateTargetMIHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "misched");
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAG latency interface.
+//===----------------------------------------------------------------------===//
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const {
+ if (!ItinData || ItinData->isEmpty())
+ return -1;
+
+ if (!DefNode->isMachineOpcode())
+ return -1;
+
+ unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+ if (!UseNode->isMachineOpcode())
+ return ItinData->getOperandCycle(DefClass, DefIdx);
+ unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *N) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ if (!N->isMachineOpcode())
+ return 1;
+
+ return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr latency interface.
+//===----------------------------------------------------------------------===//
+
+unsigned TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+ const MachineInstr &MI) const {
+ if (!ItinData || ItinData->isEmpty())
+ return 1;
+
+ unsigned Class = MI.getDesc().getSchedClass();
+ int UOps = ItinData->Itineraries[Class].NumMicroOps;
+ if (UOps >= 0)
+ return UOps;
+
+ // The # of u-ops is dynamically determined. The specific target should
+ // override this function to return the right number.
+ return 1;
+}
+
+/// Return the default expected latency for a def based on it's opcode.
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel,
+ const MachineInstr &DefMI) const {
+ if (DefMI.isTransient())
+ return 0;
+ if (DefMI.mayLoad())
+ return SchedModel.LoadLatency;
+ if (isHighLatencyDef(DefMI.getOpcode()))
+ return SchedModel.HighLatency;
+ return 1;
+}
+
+unsigned TargetInstrInfo::getPredicationCost(const MachineInstr &) const {
+ return 0;
+}
+
+unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &MI,
+ unsigned *PredCost) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
+ return MI.mayLoad() ? 2 : 1;
+
+ return ItinData->getStageLatency(MI.getDesc().getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
+ const MachineInstr &DefMI,
+ unsigned DefIdx) const {
+ const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();
+ if (!ItinData || ItinData->isEmpty())
+ return false;
+
+ unsigned DefClass = DefMI.getDesc().getSchedClass();
+ int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+ return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// Both DefMI and UseMI must be valid. By default, call directly to the
+/// itinerary. This may be overriden by the target.
+int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr &DefMI,
+ unsigned DefIdx,
+ const MachineInstr &UseMI,
+ unsigned UseIdx) const {
+ unsigned DefClass = DefMI.getDesc().getSchedClass();
+ unsigned UseClass = UseMI.getDesc().getSchedClass();
+ return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+/// If we can determine the operand latency from the def only, without itinerary
+/// lookup, do so. Otherwise return -1.
+int TargetInstrInfo::computeDefOperandLatency(
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI) const {
+
+ // Let the target hook getInstrLatency handle missing itineraries.
+ if (!ItinData)
+ return getInstrLatency(ItinData, DefMI);
+
+ if(ItinData->isEmpty())
+ return defaultDefLatency(ItinData->SchedModel, DefMI);
+
+ // ...operand lookup required
+ return -1;
+}
+
+unsigned TargetInstrInfo::computeOperandLatency(
+ const InstrItineraryData *ItinData, const MachineInstr &DefMI,
+ unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const {
+
+ int DefLatency = computeDefOperandLatency(ItinData, DefMI);
+ if (DefLatency >= 0)
+ return DefLatency;
+
+ assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
+
+ int OperLatency = 0;
+ if (UseMI)
+ OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, *UseMI, UseIdx);
+ else {
+ unsigned DefClass = DefMI.getDesc().getSchedClass();
+ OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+ }
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ InstrLatency = std::max(InstrLatency,
+ defaultDefLatency(ItinData->SchedModel, DefMI));
+ return InstrLatency;
+}
+
+bool TargetInstrInfo::getRegSequenceInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
+ assert((MI.isRegSequence() ||
+ MI.isRegSequenceLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isRegSequence())
+ return getRegSequenceLikeInputs(MI, DefIdx, InputRegs);
+
+ // We are looking at:
+ // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
+ assert(DefIdx == 0 && "REG_SEQUENCE only has one def");
+ for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
+ OpIdx += 2) {
+ const MachineOperand &MOReg = MI.getOperand(OpIdx);
+ const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ // Record Reg:SubReg, SubIdx.
+ InputRegs.push_back(RegSubRegPairAndIdx(MOReg.getReg(), MOReg.getSubReg(),
+ (unsigned)MOSubIdx.getImm()));
+ }
+ return true;
+}
+
+bool TargetInstrInfo::getExtractSubregInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPairAndIdx &InputReg) const {
+ assert((MI.isExtractSubreg() ||
+ MI.isExtractSubregLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isExtractSubreg())
+ return getExtractSubregLikeInputs(MI, DefIdx, InputReg);
+
+ // We are looking at:
+ // Def = EXTRACT_SUBREG v0.sub1, sub0.
+ assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def");
+ const MachineOperand &MOReg = MI.getOperand(1);
+ const MachineOperand &MOSubIdx = MI.getOperand(2);
+ assert(MOSubIdx.isImm() &&
+ "The subindex of the extract_subreg is not an immediate");
+
+ InputReg.Reg = MOReg.getReg();
+ InputReg.SubReg = MOReg.getSubReg();
+ InputReg.SubIdx = (unsigned)MOSubIdx.getImm();
+ return true;
+}
+
+bool TargetInstrInfo::getInsertSubregInputs(
+ const MachineInstr &MI, unsigned DefIdx,
+ RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const {
+ assert((MI.isInsertSubreg() ||
+ MI.isInsertSubregLike()) && "Instruction do not have the proper type");
+
+ if (!MI.isInsertSubreg())
+ return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg);
+
+ // We are looking at:
+ // Def = INSERT_SEQUENCE v0, v1, sub0.
+ assert(DefIdx == 0 && "INSERT_SUBREG only has one def");
+ const MachineOperand &MOBaseReg = MI.getOperand(1);
+ const MachineOperand &MOInsertedReg = MI.getOperand(2);
+ const MachineOperand &MOSubIdx = MI.getOperand(3);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ BaseReg.Reg = MOBaseReg.getReg();
+ BaseReg.SubReg = MOBaseReg.getSubReg();
+
+ InsertedReg.Reg = MOInsertedReg.getReg();
+ InsertedReg.SubReg = MOInsertedReg.getSubReg();
+ InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm();
+ return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
new file mode 100644
index 000000000000..6d3fe8ca6473
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -0,0 +1,1842 @@
+//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLoweringBase class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<bool> JumpIsExpensiveOverride(
+ "jump-is-expensive", cl::init(false),
+ cl::desc("Do not create extra branches to split comparison logic."),
+ cl::Hidden);
+
+// Although this default value is arbitrary, it is not random. It is assumed
+// that a condition that evaluates the same way by a higher percentage than this
+// is best represented as control flow. Therefore, the default value N should be
+// set such that the win from N% correct executions is greater than the loss
+// from (100 - N)% mispredicted executions for the majority of intended targets.
+static cl::opt<int> MinPercentageForPredictableBranch(
+ "min-predictable-branch", cl::init(99),
+ cl::desc("Minimum percentage (0-100) that a condition must be either true "
+ "or false to assume that the condition is predictable"),
+ cl::Hidden);
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names, const Triple &TT) {
+ Names[RTLIB::SHL_I16] = "__ashlhi3";
+ Names[RTLIB::SHL_I32] = "__ashlsi3";
+ Names[RTLIB::SHL_I64] = "__ashldi3";
+ Names[RTLIB::SHL_I128] = "__ashlti3";
+ Names[RTLIB::SRL_I16] = "__lshrhi3";
+ Names[RTLIB::SRL_I32] = "__lshrsi3";
+ Names[RTLIB::SRL_I64] = "__lshrdi3";
+ Names[RTLIB::SRL_I128] = "__lshrti3";
+ Names[RTLIB::SRA_I16] = "__ashrhi3";
+ Names[RTLIB::SRA_I32] = "__ashrsi3";
+ Names[RTLIB::SRA_I64] = "__ashrdi3";
+ Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I8] = "__mulqi3";
+ Names[RTLIB::MUL_I16] = "__mulhi3";
+ Names[RTLIB::MUL_I32] = "__mulsi3";
+ Names[RTLIB::MUL_I64] = "__muldi3";
+ Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::MULO_I32] = "__mulosi4";
+ Names[RTLIB::MULO_I64] = "__mulodi4";
+ Names[RTLIB::MULO_I128] = "__muloti4";
+ Names[RTLIB::SDIV_I8] = "__divqi3";
+ Names[RTLIB::SDIV_I16] = "__divhi3";
+ Names[RTLIB::SDIV_I32] = "__divsi3";
+ Names[RTLIB::SDIV_I64] = "__divdi3";
+ Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I8] = "__udivqi3";
+ Names[RTLIB::UDIV_I16] = "__udivhi3";
+ Names[RTLIB::UDIV_I32] = "__udivsi3";
+ Names[RTLIB::UDIV_I64] = "__udivdi3";
+ Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I8] = "__modqi3";
+ Names[RTLIB::SREM_I16] = "__modhi3";
+ Names[RTLIB::SREM_I32] = "__modsi3";
+ Names[RTLIB::SREM_I64] = "__moddi3";
+ Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I8] = "__umodqi3";
+ Names[RTLIB::UREM_I16] = "__umodhi3";
+ Names[RTLIB::UREM_I32] = "__umodsi3";
+ Names[RTLIB::UREM_I64] = "__umoddi3";
+ Names[RTLIB::UREM_I128] = "__umodti3";
+
+ Names[RTLIB::NEG_I32] = "__negsi2";
+ Names[RTLIB::NEG_I64] = "__negdi2";
+ Names[RTLIB::ADD_F32] = "__addsf3";
+ Names[RTLIB::ADD_F64] = "__adddf3";
+ Names[RTLIB::ADD_F80] = "__addxf3";
+ Names[RTLIB::ADD_F128] = "__addtf3";
+ Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+ Names[RTLIB::SUB_F32] = "__subsf3";
+ Names[RTLIB::SUB_F64] = "__subdf3";
+ Names[RTLIB::SUB_F80] = "__subxf3";
+ Names[RTLIB::SUB_F128] = "__subtf3";
+ Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+ Names[RTLIB::MUL_F32] = "__mulsf3";
+ Names[RTLIB::MUL_F64] = "__muldf3";
+ Names[RTLIB::MUL_F80] = "__mulxf3";
+ Names[RTLIB::MUL_F128] = "__multf3";
+ Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+ Names[RTLIB::DIV_F32] = "__divsf3";
+ Names[RTLIB::DIV_F64] = "__divdf3";
+ Names[RTLIB::DIV_F80] = "__divxf3";
+ Names[RTLIB::DIV_F128] = "__divtf3";
+ Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+ Names[RTLIB::REM_F32] = "fmodf";
+ Names[RTLIB::REM_F64] = "fmod";
+ Names[RTLIB::REM_F80] = "fmodl";
+ Names[RTLIB::REM_F128] = "fmodl";
+ Names[RTLIB::REM_PPCF128] = "fmodl";
+ Names[RTLIB::FMA_F32] = "fmaf";
+ Names[RTLIB::FMA_F64] = "fma";
+ Names[RTLIB::FMA_F80] = "fmal";
+ Names[RTLIB::FMA_F128] = "fmal";
+ Names[RTLIB::FMA_PPCF128] = "fmal";
+ Names[RTLIB::POWI_F32] = "__powisf2";
+ Names[RTLIB::POWI_F64] = "__powidf2";
+ Names[RTLIB::POWI_F80] = "__powixf2";
+ Names[RTLIB::POWI_F128] = "__powitf2";
+ Names[RTLIB::POWI_PPCF128] = "__powitf2";
+ Names[RTLIB::SQRT_F32] = "sqrtf";
+ Names[RTLIB::SQRT_F64] = "sqrt";
+ Names[RTLIB::SQRT_F80] = "sqrtl";
+ Names[RTLIB::SQRT_F128] = "sqrtl";
+ Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+ Names[RTLIB::LOG_F32] = "logf";
+ Names[RTLIB::LOG_F64] = "log";
+ Names[RTLIB::LOG_F80] = "logl";
+ Names[RTLIB::LOG_F128] = "logl";
+ Names[RTLIB::LOG_PPCF128] = "logl";
+ Names[RTLIB::LOG2_F32] = "log2f";
+ Names[RTLIB::LOG2_F64] = "log2";
+ Names[RTLIB::LOG2_F80] = "log2l";
+ Names[RTLIB::LOG2_F128] = "log2l";
+ Names[RTLIB::LOG2_PPCF128] = "log2l";
+ Names[RTLIB::LOG10_F32] = "log10f";
+ Names[RTLIB::LOG10_F64] = "log10";
+ Names[RTLIB::LOG10_F80] = "log10l";
+ Names[RTLIB::LOG10_F128] = "log10l";
+ Names[RTLIB::LOG10_PPCF128] = "log10l";
+ Names[RTLIB::EXP_F32] = "expf";
+ Names[RTLIB::EXP_F64] = "exp";
+ Names[RTLIB::EXP_F80] = "expl";
+ Names[RTLIB::EXP_F128] = "expl";
+ Names[RTLIB::EXP_PPCF128] = "expl";
+ Names[RTLIB::EXP2_F32] = "exp2f";
+ Names[RTLIB::EXP2_F64] = "exp2";
+ Names[RTLIB::EXP2_F80] = "exp2l";
+ Names[RTLIB::EXP2_F128] = "exp2l";
+ Names[RTLIB::EXP2_PPCF128] = "exp2l";
+ Names[RTLIB::SIN_F32] = "sinf";
+ Names[RTLIB::SIN_F64] = "sin";
+ Names[RTLIB::SIN_F80] = "sinl";
+ Names[RTLIB::SIN_F128] = "sinl";
+ Names[RTLIB::SIN_PPCF128] = "sinl";
+ Names[RTLIB::COS_F32] = "cosf";
+ Names[RTLIB::COS_F64] = "cos";
+ Names[RTLIB::COS_F80] = "cosl";
+ Names[RTLIB::COS_F128] = "cosl";
+ Names[RTLIB::COS_PPCF128] = "cosl";
+ Names[RTLIB::POW_F32] = "powf";
+ Names[RTLIB::POW_F64] = "pow";
+ Names[RTLIB::POW_F80] = "powl";
+ Names[RTLIB::POW_F128] = "powl";
+ Names[RTLIB::POW_PPCF128] = "powl";
+ Names[RTLIB::CEIL_F32] = "ceilf";
+ Names[RTLIB::CEIL_F64] = "ceil";
+ Names[RTLIB::CEIL_F80] = "ceill";
+ Names[RTLIB::CEIL_F128] = "ceill";
+ Names[RTLIB::CEIL_PPCF128] = "ceill";
+ Names[RTLIB::TRUNC_F32] = "truncf";
+ Names[RTLIB::TRUNC_F64] = "trunc";
+ Names[RTLIB::TRUNC_F80] = "truncl";
+ Names[RTLIB::TRUNC_F128] = "truncl";
+ Names[RTLIB::TRUNC_PPCF128] = "truncl";
+ Names[RTLIB::RINT_F32] = "rintf";
+ Names[RTLIB::RINT_F64] = "rint";
+ Names[RTLIB::RINT_F80] = "rintl";
+ Names[RTLIB::RINT_F128] = "rintl";
+ Names[RTLIB::RINT_PPCF128] = "rintl";
+ Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+ Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+ Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_F128] = "nearbyintl";
+ Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+ Names[RTLIB::ROUND_F32] = "roundf";
+ Names[RTLIB::ROUND_F64] = "round";
+ Names[RTLIB::ROUND_F80] = "roundl";
+ Names[RTLIB::ROUND_F128] = "roundl";
+ Names[RTLIB::ROUND_PPCF128] = "roundl";
+ Names[RTLIB::FLOOR_F32] = "floorf";
+ Names[RTLIB::FLOOR_F64] = "floor";
+ Names[RTLIB::FLOOR_F80] = "floorl";
+ Names[RTLIB::FLOOR_F128] = "floorl";
+ Names[RTLIB::FLOOR_PPCF128] = "floorl";
+ Names[RTLIB::FMIN_F32] = "fminf";
+ Names[RTLIB::FMIN_F64] = "fmin";
+ Names[RTLIB::FMIN_F80] = "fminl";
+ Names[RTLIB::FMIN_F128] = "fminl";
+ Names[RTLIB::FMIN_PPCF128] = "fminl";
+ Names[RTLIB::FMAX_F32] = "fmaxf";
+ Names[RTLIB::FMAX_F64] = "fmax";
+ Names[RTLIB::FMAX_F80] = "fmaxl";
+ Names[RTLIB::FMAX_F128] = "fmaxl";
+ Names[RTLIB::FMAX_PPCF128] = "fmaxl";
+ Names[RTLIB::ROUND_F32] = "roundf";
+ Names[RTLIB::ROUND_F64] = "round";
+ Names[RTLIB::ROUND_F80] = "roundl";
+ Names[RTLIB::ROUND_F128] = "roundl";
+ Names[RTLIB::ROUND_PPCF128] = "roundl";
+ Names[RTLIB::COPYSIGN_F32] = "copysignf";
+ Names[RTLIB::COPYSIGN_F64] = "copysign";
+ Names[RTLIB::COPYSIGN_F80] = "copysignl";
+ Names[RTLIB::COPYSIGN_F128] = "copysignl";
+ Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+ Names[RTLIB::FPEXT_F32_PPCF128] = "__gcc_stoq";
+ Names[RTLIB::FPEXT_F64_PPCF128] = "__gcc_dtoq";
+ Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2";
+ Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2";
+ Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+ if (TT.isOSDarwin()) {
+ // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
+ // of the gnueabi-style __gnu_*_ieee.
+ // FIXME: What about other targets?
+ Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2";
+ Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2";
+ } else {
+ Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+ Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+ }
+ Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2";
+ Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2";
+ Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2";
+ Names[RTLIB::FPROUND_PPCF128_F16] = "__trunctfhf2";
+ Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+ Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+ Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2";
+ Names[RTLIB::FPROUND_PPCF128_F32] = "__gcc_qtos";
+ Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+ Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPROUND_PPCF128_F64] = "__gcc_qtod";
+ Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+ Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+ Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+ Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+ Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+ Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+ Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+ Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+ Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+ Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi";
+ Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOSINT_PPCF128_I32] = "__gcc_qtou";
+ Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+ Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+ Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+ Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+ Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+ Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+ Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+ Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+ Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+ Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+ Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
+ Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+ Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+ Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+ Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+ Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+ Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+ Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
+ Names[RTLIB::SINTTOFP_I32_PPCF128] = "__gcc_itoq";
+ Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+ Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+ Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+ Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+ Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+ Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+ Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+ Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
+ Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+ Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+ Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+ Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+ Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
+ Names[RTLIB::UINTTOFP_I32_PPCF128] = "__gcc_utoq";
+ Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+ Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+ Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+ Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+ Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+ Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+ Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+ Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
+ Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+ Names[RTLIB::OEQ_F32] = "__eqsf2";
+ Names[RTLIB::OEQ_F64] = "__eqdf2";
+ Names[RTLIB::OEQ_F128] = "__eqtf2";
+ Names[RTLIB::OEQ_PPCF128] = "__gcc_qeq";
+ Names[RTLIB::UNE_F32] = "__nesf2";
+ Names[RTLIB::UNE_F64] = "__nedf2";
+ Names[RTLIB::UNE_F128] = "__netf2";
+ Names[RTLIB::UNE_PPCF128] = "__gcc_qne";
+ Names[RTLIB::OGE_F32] = "__gesf2";
+ Names[RTLIB::OGE_F64] = "__gedf2";
+ Names[RTLIB::OGE_F128] = "__getf2";
+ Names[RTLIB::OGE_PPCF128] = "__gcc_qge";
+ Names[RTLIB::OLT_F32] = "__ltsf2";
+ Names[RTLIB::OLT_F64] = "__ltdf2";
+ Names[RTLIB::OLT_F128] = "__lttf2";
+ Names[RTLIB::OLT_PPCF128] = "__gcc_qlt";
+ Names[RTLIB::OLE_F32] = "__lesf2";
+ Names[RTLIB::OLE_F64] = "__ledf2";
+ Names[RTLIB::OLE_F128] = "__letf2";
+ Names[RTLIB::OLE_PPCF128] = "__gcc_qle";
+ Names[RTLIB::OGT_F32] = "__gtsf2";
+ Names[RTLIB::OGT_F64] = "__gtdf2";
+ Names[RTLIB::OGT_F128] = "__gttf2";
+ Names[RTLIB::OGT_PPCF128] = "__gcc_qgt";
+ Names[RTLIB::UO_F32] = "__unordsf2";
+ Names[RTLIB::UO_F64] = "__unorddf2";
+ Names[RTLIB::UO_F128] = "__unordtf2";
+ Names[RTLIB::UO_PPCF128] = "__gcc_qunord";
+ Names[RTLIB::O_F32] = "__unordsf2";
+ Names[RTLIB::O_F64] = "__unorddf2";
+ Names[RTLIB::O_F128] = "__unordtf2";
+ Names[RTLIB::O_PPCF128] = "__gcc_qunord";
+ Names[RTLIB::MEMCPY] = "memcpy";
+ Names[RTLIB::MEMMOVE] = "memmove";
+ Names[RTLIB::MEMSET] = "memset";
+ Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16";
+ Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+ Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+ Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+ Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+ Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16";
+ Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+ Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+ Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+ Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+ Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8";
+ Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8";
+ Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8";
+ Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8";
+ Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16";
+
+ Names[RTLIB::ATOMIC_LOAD] = "__atomic_load";
+ Names[RTLIB::ATOMIC_LOAD_1] = "__atomic_load_1";
+ Names[RTLIB::ATOMIC_LOAD_2] = "__atomic_load_2";
+ Names[RTLIB::ATOMIC_LOAD_4] = "__atomic_load_4";
+ Names[RTLIB::ATOMIC_LOAD_8] = "__atomic_load_8";
+ Names[RTLIB::ATOMIC_LOAD_16] = "__atomic_load_16";
+
+ Names[RTLIB::ATOMIC_STORE] = "__atomic_store";
+ Names[RTLIB::ATOMIC_STORE_1] = "__atomic_store_1";
+ Names[RTLIB::ATOMIC_STORE_2] = "__atomic_store_2";
+ Names[RTLIB::ATOMIC_STORE_4] = "__atomic_store_4";
+ Names[RTLIB::ATOMIC_STORE_8] = "__atomic_store_8";
+ Names[RTLIB::ATOMIC_STORE_16] = "__atomic_store_16";
+
+ Names[RTLIB::ATOMIC_EXCHANGE] = "__atomic_exchange";
+ Names[RTLIB::ATOMIC_EXCHANGE_1] = "__atomic_exchange_1";
+ Names[RTLIB::ATOMIC_EXCHANGE_2] = "__atomic_exchange_2";
+ Names[RTLIB::ATOMIC_EXCHANGE_4] = "__atomic_exchange_4";
+ Names[RTLIB::ATOMIC_EXCHANGE_8] = "__atomic_exchange_8";
+ Names[RTLIB::ATOMIC_EXCHANGE_16] = "__atomic_exchange_16";
+
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE] = "__atomic_compare_exchange";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_1] = "__atomic_compare_exchange_1";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_2] = "__atomic_compare_exchange_2";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_4] = "__atomic_compare_exchange_4";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_8] = "__atomic_compare_exchange_8";
+ Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_16] = "__atomic_compare_exchange_16";
+
+ Names[RTLIB::ATOMIC_FETCH_ADD_1] = "__atomic_fetch_add_1";
+ Names[RTLIB::ATOMIC_FETCH_ADD_2] = "__atomic_fetch_add_2";
+ Names[RTLIB::ATOMIC_FETCH_ADD_4] = "__atomic_fetch_add_4";
+ Names[RTLIB::ATOMIC_FETCH_ADD_8] = "__atomic_fetch_add_8";
+ Names[RTLIB::ATOMIC_FETCH_ADD_16] = "__atomic_fetch_add_16";
+ Names[RTLIB::ATOMIC_FETCH_SUB_1] = "__atomic_fetch_sub_1";
+ Names[RTLIB::ATOMIC_FETCH_SUB_2] = "__atomic_fetch_sub_2";
+ Names[RTLIB::ATOMIC_FETCH_SUB_4] = "__atomic_fetch_sub_4";
+ Names[RTLIB::ATOMIC_FETCH_SUB_8] = "__atomic_fetch_sub_8";
+ Names[RTLIB::ATOMIC_FETCH_SUB_16] = "__atomic_fetch_sub_16";
+ Names[RTLIB::ATOMIC_FETCH_AND_1] = "__atomic_fetch_and_1";
+ Names[RTLIB::ATOMIC_FETCH_AND_2] = "__atomic_fetch_and_2";
+ Names[RTLIB::ATOMIC_FETCH_AND_4] = "__atomic_fetch_and_4";
+ Names[RTLIB::ATOMIC_FETCH_AND_8] = "__atomic_fetch_and_8";
+ Names[RTLIB::ATOMIC_FETCH_AND_16] = "__atomic_fetch_and_16";
+ Names[RTLIB::ATOMIC_FETCH_OR_1] = "__atomic_fetch_or_1";
+ Names[RTLIB::ATOMIC_FETCH_OR_2] = "__atomic_fetch_or_2";
+ Names[RTLIB::ATOMIC_FETCH_OR_4] = "__atomic_fetch_or_4";
+ Names[RTLIB::ATOMIC_FETCH_OR_8] = "__atomic_fetch_or_8";
+ Names[RTLIB::ATOMIC_FETCH_OR_16] = "__atomic_fetch_or_16";
+ Names[RTLIB::ATOMIC_FETCH_XOR_1] = "__atomic_fetch_xor_1";
+ Names[RTLIB::ATOMIC_FETCH_XOR_2] = "__atomic_fetch_xor_2";
+ Names[RTLIB::ATOMIC_FETCH_XOR_4] = "__atomic_fetch_xor_4";
+ Names[RTLIB::ATOMIC_FETCH_XOR_8] = "__atomic_fetch_xor_8";
+ Names[RTLIB::ATOMIC_FETCH_XOR_16] = "__atomic_fetch_xor_16";
+ Names[RTLIB::ATOMIC_FETCH_NAND_1] = "__atomic_fetch_nand_1";
+ Names[RTLIB::ATOMIC_FETCH_NAND_2] = "__atomic_fetch_nand_2";
+ Names[RTLIB::ATOMIC_FETCH_NAND_4] = "__atomic_fetch_nand_4";
+ Names[RTLIB::ATOMIC_FETCH_NAND_8] = "__atomic_fetch_nand_8";
+ Names[RTLIB::ATOMIC_FETCH_NAND_16] = "__atomic_fetch_nand_16";
+
+ if (TT.isGNUEnvironment()) {
+ Names[RTLIB::SINCOS_F32] = "sincosf";
+ Names[RTLIB::SINCOS_F64] = "sincos";
+ Names[RTLIB::SINCOS_F80] = "sincosl";
+ Names[RTLIB::SINCOS_F128] = "sincosl";
+ Names[RTLIB::SINCOS_PPCF128] = "sincosl";
+ }
+
+ if (!TT.isOSOpenBSD()) {
+ Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
+ }
+
+ Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize";
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+ CCs[i] = CallingConv::C;
+ }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::f32)
+ return FPEXT_F16_F32;
+ } else if (OpVT == MVT::f32) {
+ if (RetVT == MVT::f64)
+ return FPEXT_F32_F64;
+ if (RetVT == MVT::f128)
+ return FPEXT_F32_F128;
+ if (RetVT == MVT::ppcf128)
+ return FPEXT_F32_PPCF128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::f128)
+ return FPEXT_F64_F128;
+ else if (RetVT == MVT::ppcf128)
+ return FPEXT_F64_PPCF128;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+ if (RetVT == MVT::f16) {
+ if (OpVT == MVT::f32)
+ return FPROUND_F32_F16;
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F16;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F16;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F16;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F16;
+ } else if (RetVT == MVT::f32) {
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_F32;
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F32;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F32;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F32;
+ } else if (RetVT == MVT::f64) {
+ if (OpVT == MVT::f80)
+ return FPROUND_F80_F64;
+ if (OpVT == MVT::f128)
+ return FPROUND_F128_F64;
+ if (OpVT == MVT::ppcf128)
+ return FPROUND_PPCF128_F64;
+ }
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F32_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F32_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F32_I128;
+ } else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F64_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F64_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F64_I128;
+ } else if (OpVT == MVT::f80) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F80_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F80_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F80_I128;
+ } else if (OpVT == MVT::f128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F128_I128;
+ } else if (OpVT == MVT::ppcf128) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_PPCF128_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_PPCF128_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_PPCF128_I128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return SINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return SINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return SINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return SINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return SINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+ if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I32_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I32_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I32_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I32_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I32_PPCF128;
+ } else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I64_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I64_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I64_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I64_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I64_PPCF128;
+ } else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f32)
+ return UINTTOFP_I128_F32;
+ if (RetVT == MVT::f64)
+ return UINTTOFP_I128_F64;
+ if (RetVT == MVT::f80)
+ return UINTTOFP_I128_F80;
+ if (RetVT == MVT::f128)
+ return UINTTOFP_I128_F128;
+ if (RetVT == MVT::ppcf128)
+ return UINTTOFP_I128_PPCF128;
+ }
+ return UNKNOWN_LIBCALL;
+}
+
+RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
+#define OP_TO_LIBCALL(Name, Enum) \
+ case Name: \
+ switch (VT.SimpleTy) { \
+ default: \
+ return UNKNOWN_LIBCALL; \
+ case MVT::i8: \
+ return Enum##_1; \
+ case MVT::i16: \
+ return Enum##_2; \
+ case MVT::i32: \
+ return Enum##_4; \
+ case MVT::i64: \
+ return Enum##_8; \
+ case MVT::i128: \
+ return Enum##_16; \
+ }
+
+ switch (Opc) {
+ OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET)
+ OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN)
+ }
+
+#undef OP_TO_LIBCALL
+
+ return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+ memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
+ CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
+ CCs[RTLIB::UNE_F32] = ISD::SETNE;
+ CCs[RTLIB::UNE_F64] = ISD::SETNE;
+ CCs[RTLIB::UNE_F128] = ISD::SETNE;
+ CCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
+ CCs[RTLIB::OGE_F32] = ISD::SETGE;
+ CCs[RTLIB::OGE_F64] = ISD::SETGE;
+ CCs[RTLIB::OGE_F128] = ISD::SETGE;
+ CCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
+ CCs[RTLIB::OLT_F32] = ISD::SETLT;
+ CCs[RTLIB::OLT_F64] = ISD::SETLT;
+ CCs[RTLIB::OLT_F128] = ISD::SETLT;
+ CCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
+ CCs[RTLIB::OLE_F32] = ISD::SETLE;
+ CCs[RTLIB::OLE_F64] = ISD::SETLE;
+ CCs[RTLIB::OLE_F128] = ISD::SETLE;
+ CCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
+ CCs[RTLIB::OGT_F32] = ISD::SETGT;
+ CCs[RTLIB::OGT_F64] = ISD::SETGT;
+ CCs[RTLIB::OGT_F128] = ISD::SETGT;
+ CCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
+ CCs[RTLIB::UO_F32] = ISD::SETNE;
+ CCs[RTLIB::UO_F64] = ISD::SETNE;
+ CCs[RTLIB::UO_F128] = ISD::SETNE;
+ CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
+ CCs[RTLIB::O_F32] = ISD::SETEQ;
+ CCs[RTLIB::O_F64] = ISD::SETEQ;
+ CCs[RTLIB::O_F128] = ISD::SETEQ;
+ CCs[RTLIB::O_PPCF128] = ISD::SETEQ;
+}
+
+/// NOTE: The TargetMachine owns TLOF.
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
+ initActions();
+
+ // Perform these initializations only once.
+ MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+ = MaxStoresPerMemmoveOptSize = 4;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ HasMultipleConditionRegisters = false;
+ HasExtractBitsInsn = false;
+ FsqrtIsCheap = false;
+ JumpIsExpensive = JumpIsExpensiveOverride;
+ PredictableSelectIsExpensive = false;
+ MaskAndBranchFoldingIsLegal = false;
+ EnableExtLdPromotion = false;
+ HasFloatingPointExceptions = true;
+ StackPointerRegisterToSaveRestore = 0;
+ BooleanContents = UndefinedBooleanContent;
+ BooleanFloatContents = UndefinedBooleanContent;
+ BooleanVectorContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = Sched::ILP;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ MinFunctionAlignment = 0;
+ PrefFunctionAlignment = 0;
+ PrefLoopAlignment = 0;
+ GatherAllAliasesMaxDepth = 6;
+ MinStackArgumentAlignment = 1;
+ MinimumJumpTableEntries = 4;
+ // TODO: the default will be switched to 0 in the next commit, along
+ // with the Target-specific changes necessary.
+ MaxAtomicSizeInBitsSupported = 1024;
+
+ MinCmpXchgSizeInBits = 0;
+
+ std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
+
+ InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
+ InitCmpLibcallCCs(CmpLibcallCCs);
+ InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+void TargetLoweringBase::initActions() {
+ // All operations default to being supported.
+ memset(OpActions, 0, sizeof(OpActions));
+ memset(LoadExtActions, 0, sizeof(LoadExtActions));
+ memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+ memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+ memset(CondCodeActions, 0, sizeof(CondCodeActions));
+ std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
+ std::fill(std::begin(TargetDAGCombineArray),
+ std::end(TargetDAGCombineArray), 0);
+
+ // Set default actions for various operations.
+ for (MVT VT : MVT::all_valuetypes()) {
+ // Default all indexed load / store to expand.
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ setIndexedLoadAction(IM, VT, Expand);
+ setIndexedStoreAction(IM, VT, Expand);
+ }
+
+ // Most backends expect to see the node which just returns the value loaded.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
+
+ // These operations default to expand.
+ setOperationAction(ISD::FGETSIGN, VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
+ setOperationAction(ISD::FMINNUM, VT, Expand);
+ setOperationAction(ISD::FMAXNUM, VT, Expand);
+ setOperationAction(ISD::FMINNAN, VT, Expand);
+ setOperationAction(ISD::FMAXNAN, VT, Expand);
+ setOperationAction(ISD::FMAD, VT, Expand);
+ setOperationAction(ISD::SMIN, VT, Expand);
+ setOperationAction(ISD::SMAX, VT, Expand);
+ setOperationAction(ISD::UMIN, VT, Expand);
+ setOperationAction(ISD::UMAX, VT, Expand);
+
+ // Overflow operations default to expand
+ setOperationAction(ISD::SADDO, VT, Expand);
+ setOperationAction(ISD::SSUBO, VT, Expand);
+ setOperationAction(ISD::UADDO, VT, Expand);
+ setOperationAction(ISD::USUBO, VT, Expand);
+ setOperationAction(ISD::SMULO, VT, Expand);
+ setOperationAction(ISD::UMULO, VT, Expand);
+
+ // These default to Expand so they will be expanded to CTLZ/CTTZ by default.
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+
+ setOperationAction(ISD::BITREVERSE, VT, Expand);
+
+ // These library functions default to expand.
+ setOperationAction(ISD::FROUND, VT, Expand);
+
+ // These operations default to expand for vector types.
+ if (VT.isVector()) {
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
+ }
+
+ // For most targets @llvm.get.dynamic.area.offset just returns 0.
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
+ }
+
+ // Most targets ignore the @llvm.prefetch intrinsic.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+ // Most targets also ignore the @llvm.readcyclecounter intrinsic.
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
+
+ // ConstantFP nodes default to expand. Targets can either change this to
+ // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+ // to optimize expansions for certain constants.
+ setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
+
+ // These library functions default to expand.
+ for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+ setOperationAction(ISD::FLOG , VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FEXP , VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FROUND, VT, Expand);
+ }
+
+ // Default ISD::TRAP to expand (which turns it into abort).
+ setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+ // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+ // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+ //
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+}
+
+MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
+ EVT) const {
+ return MVT::getIntegerVT(8 * DL.getPointerSize(0));
+}
+
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy,
+ const DataLayout &DL) const {
+ assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
+ if (LHSTy.isVector())
+ return LHSTy;
+ return getScalarShiftAmountTy(DL, LHSTy);
+}
+
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
+ assert(isTypeLegal(VT));
+ switch (Op) {
+ default:
+ return false;
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ return true;
+ }
+}
+
+void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
+ // If the command-line option was specified, ignore this request.
+ if (!JumpIsExpensiveOverride.getNumOccurrences())
+ JumpIsExpensive = isExpensive;
+}
+
+TargetLoweringBase::LegalizeKind
+TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
+ // If this is a simple type, use the ComputeRegisterProp mechanism.
+ if (VT.isSimple()) {
+ MVT SVT = VT.getSimpleVT();
+ assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
+ MVT NVT = TransformToType[SVT.SimpleTy];
+ LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
+
+ assert((LA == TypeLegal || LA == TypeSoftenFloat ||
+ ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) &&
+ "Promote may not follow Expand or Promote");
+
+ if (LA == TypeSplitVector)
+ return LegalizeKind(LA,
+ EVT::getVectorVT(Context, SVT.getVectorElementType(),
+ SVT.getVectorNumElements() / 2));
+ if (LA == TypeScalarizeVector)
+ return LegalizeKind(LA, SVT.getVectorElementType());
+ return LegalizeKind(LA, NVT);
+ }
+
+ // Handle Extended Scalar Types.
+ if (!VT.isVector()) {
+ assert(VT.isInteger() && "Float types must be simple");
+ unsigned BitSize = VT.getSizeInBits();
+ // First promote to a power-of-two size, then expand if necessary.
+ if (BitSize < 8 || !isPowerOf2_32(BitSize)) {
+ EVT NVT = VT.getRoundIntegerType(Context);
+ assert(NVT != VT && "Unable to round integer VT");
+ LegalizeKind NextStep = getTypeConversion(Context, NVT);
+ // Avoid multi-step promotion.
+ if (NextStep.first == TypePromoteInteger)
+ return NextStep;
+ // Return rounded integer type.
+ return LegalizeKind(TypePromoteInteger, NVT);
+ }
+
+ return LegalizeKind(TypeExpandInteger,
+ EVT::getIntegerVT(Context, VT.getSizeInBits() / 2));
+ }
+
+ // Handle vector types.
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+
+ // Vectors with only one element are always scalarized.
+ if (NumElts == 1)
+ return LegalizeKind(TypeScalarizeVector, EltVT);
+
+ // Try to widen vector elements until the element type is a power of two and
+ // promote it to a legal type later on, for example:
+ // <3 x i8> -> <4 x i8> -> <4 x i32>
+ if (EltVT.isInteger()) {
+ // Vectors with a number of elements that is not a power of two are always
+ // widened, for example <3 x i8> -> <4 x i8>.
+ if (!VT.isPow2VectorType()) {
+ NumElts = (unsigned)NextPowerOf2(NumElts);
+ EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
+ return LegalizeKind(TypeWidenVector, NVT);
+ }
+
+ // Examine the element type.
+ LegalizeKind LK = getTypeConversion(Context, EltVT);
+
+ // If type is to be expanded, split the vector.
+ // <4 x i140> -> <2 x i140>
+ if (LK.first == TypeExpandInteger)
+ return LegalizeKind(TypeSplitVector,
+ EVT::getVectorVT(Context, EltVT, NumElts / 2));
+
+ // Promote the integer element types until a legal vector type is found
+ // or until the element integer type is too big. If a legal type was not
+ // found, fallback to the usual mechanism of widening/splitting the
+ // vector.
+ EVT OldEltVT = EltVT;
+ while (1) {
+ // Increase the bitwidth of the element to the next pow-of-two
+ // (which is greater than 8 bits).
+ EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
+ .getRoundIntegerType(Context);
+
+ // Stop trying when getting a non-simple element type.
+ // Note that vector elements may be greater than legal vector element
+ // types. Example: X86 XMM registers hold 64bit element on 32bit
+ // systems.
+ if (!EltVT.isSimple())
+ break;
+
+ // Build a new vector type and check if it is legal.
+ MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+ // Found a legal promoted vector type.
+ if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
+ return LegalizeKind(TypePromoteInteger,
+ EVT::getVectorVT(Context, EltVT, NumElts));
+ }
+
+ // Reset the type to the unexpanded type if we did not find a legal vector
+ // type with a promoted vector element type.
+ EltVT = OldEltVT;
+ }
+
+ // Try to widen the vector until a legal type is found.
+ // If there is no wider legal type, split the vector.
+ while (1) {
+ // Round up to the next power of 2.
+ NumElts = (unsigned)NextPowerOf2(NumElts);
+
+ // If there is no simple vector type with this many elements then there
+ // cannot be a larger legal vector type. Note that this assumes that
+ // there are no skipped intermediate vector types in the simple types.
+ if (!EltVT.isSimple())
+ break;
+ MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
+ if (LargerVector == MVT())
+ break;
+
+ // If this type is legal then widen the vector.
+ if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal)
+ return LegalizeKind(TypeWidenVector, LargerVector);
+ }
+
+ // Widen odd vectors to next power of two.
+ if (!VT.isPow2VectorType()) {
+ EVT NVT = VT.getPow2VectorType(Context);
+ return LegalizeKind(TypeWidenVector, NVT);
+ }
+
+ // Vectors with illegal element types are expanded.
+ EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2);
+ return LegalizeKind(TypeSplitVector, NVT);
+}
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT,
+ TargetLoweringBase *TLI) {
+ // Figure out the right, legal destination reg to copy into.
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+ if (!TLI->isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ MVT DestVT = TLI->getRegisterType(NewVT);
+ RegisterVT = DestVT;
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I))
+ return true;
+ }
+ return false;
+}
+
+/// Replace/modify any TargetFrameIndex operands with a targte-dependent
+/// sequence of memory operands that is recognized by PrologEpilogInserter.
+MachineBasicBlock *
+TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
+ MachineBasicBlock *MBB) const {
+ MachineInstr *MI = &InitialMI;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ // We're handling multiple types of operands here:
+ // PATCHPOINT MetaArgs - live-in, read only, direct
+ // STATEPOINT Deopt Spill - live-through, read only, indirect
+ // STATEPOINT Deopt Alloca - live-through, read only, direct
+ // (We're currently conservative and mark the deopt slots read/write in
+ // practice.)
+ // STATEPOINT GC Spill - live-through, read/write, indirect
+ // STATEPOINT GC Alloca - live-through, read/write, direct
+ // The live-in vs live-through is handled already (the live through ones are
+ // all stack slots), but we need to handle the different type of stackmap
+ // operands and memory effects here.
+
+ // MI changes inside this loop as we grow operands.
+ for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) {
+ MachineOperand &MO = MI->getOperand(OperIdx);
+ if (!MO.isFI())
+ continue;
+
+ // foldMemoryOperand builds a new MI after replacing a single FI operand
+ // with the canonical set of five x86 addressing-mode operands.
+ int FI = MO.getIndex();
+ MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc());
+
+ // Copy operands before the frame-index.
+ for (unsigned i = 0; i < OperIdx; ++i)
+ MIB.addOperand(MI->getOperand(i));
+ // Add frame index operands recognized by stackmaps.cpp
+ if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
+ // indirect-mem-ref tag, size, #FI, offset.
+ // Used for spills inserted by StatepointLowering. This codepath is not
+ // used for patchpoints/stackmaps at all, for these spilling is done via
+ // foldMemoryOperand callback only.
+ assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
+ MIB.addImm(StackMaps::IndirectMemRefOp);
+ MIB.addImm(MFI.getObjectSize(FI));
+ MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.addImm(0);
+ } else {
+ // direct-mem-ref tag, #FI, offset.
+ // Used by patchpoint, and direct alloca arguments to statepoints
+ MIB.addImm(StackMaps::DirectMemRefOp);
+ MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.addImm(0);
+ }
+ // Copy the operands after the frame index.
+ for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ // Inherit previous memory operands.
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
+
+ // Add a new memory operand for this FI.
+ assert(MFI.getObjectOffset(FI) != -1);
+
+ auto Flags = MachineMemOperand::MOLoad;
+ if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
+ Flags |= MachineMemOperand::MOStore;
+ Flags |= MachineMemOperand::MOVolatile;
+ }
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), Flags,
+ MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
+ MIB->addMemOperand(MF, MMO);
+
+ // Replace the instruction and update the operand index.
+ MBB->insert(MachineBasicBlock::iterator(MI), MIB);
+ OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1;
+ MI->eraseFromParent();
+ MI = MIB;
+ }
+ return MBB;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+// This function is in TargetLowering because it uses RegClassForVT which would
+// need to be moved to TargetRegisterInfo and would necessitate moving
+// isTypeLegal over as well - a massive change that would just require
+// TargetLowering having a TargetRegisterInfo class member that it would use.
+std::pair<const TargetRegisterClass *, uint8_t>
+TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
+ const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
+ if (!RC)
+ return std::make_pair(RC, 0);
+
+ // Compute the set of all super-register classes.
+ BitVector SuperRegRC(TRI->getNumRegClasses());
+ for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
+ SuperRegRC.setBitsInMask(RCI.getMask());
+
+ // Find the first legal register class with the largest spill size.
+ const TargetRegisterClass *BestRC = RC;
+ for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
+ const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
+ // We want the largest possible spill size.
+ if (SuperRC->getSize() <= BestRC->getSize())
+ continue;
+ if (!isLegalRC(SuperRC))
+ continue;
+ BestRC = SuperRC;
+ }
+ return std::make_pair(BestRC, 1);
+}
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLoweringBase::computeRegisterProperties(
+ const TargetRegisterInfo *TRI) {
+ static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
+ "Too many value types for ValueTypeActions to hold!");
+
+ // Everything defaults to needing one register.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ NumRegistersForVT[i] = 1;
+ RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+ }
+ // ...except isVoid, which doesn't need any registers.
+ NumRegistersForVT[MVT::isVoid] = 0;
+
+ // Find the largest integer register class.
+ unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+ for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg)
+ assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+ // Every integer value type larger than this largest register takes twice as
+ // many registers to represent as the previous ValueType.
+ for (unsigned ExpandedReg = LargestIntReg + 1;
+ ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
+ NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+ RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+ TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+ ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
+ TypeExpandInteger);
+ }
+
+ // Inspect all of the ValueType's smaller than the largest integer
+ // register to see which ones need promotion.
+ unsigned LegalIntReg = LargestIntReg;
+ for (unsigned IntReg = LargestIntReg - 1;
+ IntReg >= (unsigned)MVT::i1; --IntReg) {
+ MVT IVT = (MVT::SimpleValueType)IntReg;
+ if (isTypeLegal(IVT)) {
+ LegalIntReg = IntReg;
+ } else {
+ RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+ (const MVT::SimpleValueType)LegalIntReg;
+ ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
+ }
+ }
+
+ // ppcf128 type is really two f64's.
+ if (!isTypeLegal(MVT::ppcf128)) {
+ if (isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+ RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+ TransformToType[MVT::ppcf128] = MVT::f64;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
+ } else {
+ NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128];
+ RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128];
+ TransformToType[MVT::ppcf128] = MVT::i128;
+ ValueTypeActions.setTypeAction(MVT::ppcf128, TypeSoftenFloat);
+ }
+ }
+
+ // Decide how to handle f128. If the target does not have native f128 support,
+ // expand it to i128 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f128)) {
+ NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
+ RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
+ TransformToType[MVT::f128] = MVT::i128;
+ ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f64. If the target does not have native f64 support,
+ // expand it to i64 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f64)) {
+ NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+ RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+ TransformToType[MVT::f64] = MVT::i64;
+ ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f32. If the target does not have native f32 support,
+ // expand it to i32 and we will be generating soft float library calls.
+ if (!isTypeLegal(MVT::f32)) {
+ NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+ RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+ TransformToType[MVT::f32] = MVT::i32;
+ ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
+ }
+
+ // Decide how to handle f16. If the target does not have native f16 support,
+ // promote it to f32, because there are no f16 library calls (except for
+ // conversions).
+ if (!isTypeLegal(MVT::f16)) {
+ NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+ RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+ TransformToType[MVT::f16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
+ }
+
+ // Loop over all of the vector value types to see which need transformations.
+ for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT VT = (MVT::SimpleValueType) i;
+ if (isTypeLegal(VT))
+ continue;
+
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ bool IsLegalWiderType = false;
+ LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
+ switch (PreferredAction) {
+ case TypePromoteInteger: {
+ // Try to promote the elements of integer vectors. If no legal
+ // promotion was found, fall through to the widen-vector method.
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
+ // Promote vectors of integers to vectors with the same number
+ // of elements, with a wider element type.
+ if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() &&
+ SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType)
+ break;
+ }
+ case TypeWidenVector: {
+ // Try to widen the vector.
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
+ if (SVT.getVectorElementType() == EltVT
+ && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType)
+ break;
+ }
+ case TypeSplitVector:
+ case TypeScalarizeVector: {
+ MVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+ NumIntermediates, RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ MVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ if (PreferredAction == TypeScalarizeVector)
+ ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
+ else if (PreferredAction == TypeSplitVector)
+ ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+ else
+ // Set type action according to the number of elements.
+ ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector
+ : TypeSplitVector);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown vector legalization action!");
+ }
+ }
+
+ // Determine the 'representative' register class for each value type.
+ // An representative register class is the largest (meaning one which is
+ // not a sub-register class / subreg register class) legal register class for
+ // a group of value types. For example, on i386, i8, i16, and i32
+ // representative would be GR32; while on x86_64 it's GR64.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ const TargetRegisterClass* RRC;
+ uint8_t Cost;
+ std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i);
+ RepRegClassForVT[i] = RRC;
+ RepRegClassCostForVT[i] = Cost;
+ }
+}
+
+EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const {
+ assert(!VT.isVector() && "No default SetCC type for vectors!");
+ return getPointerTy(DL).SimpleTy;
+}
+
+MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
+ return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register. It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+ EVT &IntermediateVT,
+ unsigned &NumIntermediates,
+ MVT &RegisterVT) const {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If there is a wider vector type with the same element type as this one,
+ // or a promoted vector type that has the same number of elements which
+ // are wider, then we should convert to that legal vector type.
+ // This handles things like <2 x float> -> <4 x float> and
+ // <4 x i1> -> <4 x i32>.
+ LegalizeTypeAction TA = getTypeAction(Context, VT);
+ if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+ EVT RegisterEVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterEVT)) {
+ IntermediateVT = RegisterEVT;
+ RegisterVT = RegisterEVT.getSimpleVT();
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
+ EVT EltTy = VT.getVectorElementType();
+
+ unsigned NumVectorRegs = 1;
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+ // could break down into LHS/RHS like LegalizeDAG does.
+ if (!isPowerOf2_32(NumElts)) {
+ NumVectorRegs = NumElts;
+ NumElts = 1;
+ }
+
+ // Divide the input until we get to a supported size. This will always
+ // end with a scalar if the target doesn't support vectors.
+ while (NumElts > 1 && !isTypeLegal(
+ EVT::getVectorVT(Context, EltTy, NumElts))) {
+ NumElts >>= 1;
+ NumVectorRegs <<= 1;
+ }
+
+ NumIntermediates = NumVectorRegs;
+
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+ if (!isTypeLegal(NewVT))
+ NewVT = EltTy;
+ IntermediateVT = NewVT;
+
+ MVT DestVT = getRegisterType(Context, NewVT);
+ RegisterVT = DestVT;
+ unsigned NewVTSize = NewVT.getSizeInBits();
+
+ // Convert sizes such as i33 to i64.
+ if (!isPowerOf2_32(NewVTSize))
+ NewVTSize = NextPowerOf2(NewVTSize);
+
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
+ return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
+
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function. This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const TargetLowering &TLI, const DataLayout &DL) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DL, ReturnType, ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ Flags.setSExt();
+ else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i)
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0));
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
+ return DL.getABITypeAlignment(Ty);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ unsigned AddrSpace,
+ unsigned Alignment,
+ bool *Fast) const {
+ // Check if the specified alignment is sufficient based on the data layout.
+ // TODO: While using the data layout works in practice, a better solution
+ // would be to implement this check directly (make this a virtual function).
+ // For example, the ABI alignment may change based on software platform while
+ // this function should only be affected by hardware implementation.
+ Type *Ty = VT.getTypeForEVT(Context);
+ if (Alignment >= DL.getABITypeAlignment(Ty)) {
+ // Assume that an access that meets the ABI-specified alignment is fast.
+ if (Fast != nullptr)
+ *Fast = true;
+ return true;
+ }
+
+ // This is a misaligned access.
+ return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
+}
+
+BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
+ return BranchProbability(MinPercentageForPredictableBranch, 100);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetTransformInfo Helpers
+//===----------------------------------------------------------------------===//
+
+int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
+ enum InstructionOpcodes {
+#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
+#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
+#include "llvm/IR/Instruction.def"
+ };
+ switch (static_cast<InstructionOpcodes>(Opcode)) {
+ case Ret: return 0;
+ case Br: return 0;
+ case Switch: return 0;
+ case IndirectBr: return 0;
+ case Invoke: return 0;
+ case Resume: return 0;
+ case Unreachable: return 0;
+ case CleanupRet: return 0;
+ case CatchRet: return 0;
+ case CatchPad: return 0;
+ case CatchSwitch: return 0;
+ case CleanupPad: return 0;
+ case Add: return ISD::ADD;
+ case FAdd: return ISD::FADD;
+ case Sub: return ISD::SUB;
+ case FSub: return ISD::FSUB;
+ case Mul: return ISD::MUL;
+ case FMul: return ISD::FMUL;
+ case UDiv: return ISD::UDIV;
+ case SDiv: return ISD::SDIV;
+ case FDiv: return ISD::FDIV;
+ case URem: return ISD::UREM;
+ case SRem: return ISD::SREM;
+ case FRem: return ISD::FREM;
+ case Shl: return ISD::SHL;
+ case LShr: return ISD::SRL;
+ case AShr: return ISD::SRA;
+ case And: return ISD::AND;
+ case Or: return ISD::OR;
+ case Xor: return ISD::XOR;
+ case Alloca: return 0;
+ case Load: return ISD::LOAD;
+ case Store: return ISD::STORE;
+ case GetElementPtr: return 0;
+ case Fence: return 0;
+ case AtomicCmpXchg: return 0;
+ case AtomicRMW: return 0;
+ case Trunc: return ISD::TRUNCATE;
+ case ZExt: return ISD::ZERO_EXTEND;
+ case SExt: return ISD::SIGN_EXTEND;
+ case FPToUI: return ISD::FP_TO_UINT;
+ case FPToSI: return ISD::FP_TO_SINT;
+ case UIToFP: return ISD::UINT_TO_FP;
+ case SIToFP: return ISD::SINT_TO_FP;
+ case FPTrunc: return ISD::FP_ROUND;
+ case FPExt: return ISD::FP_EXTEND;
+ case PtrToInt: return ISD::BITCAST;
+ case IntToPtr: return ISD::BITCAST;
+ case BitCast: return ISD::BITCAST;
+ case AddrSpaceCast: return ISD::ADDRSPACECAST;
+ case ICmp: return ISD::SETCC;
+ case FCmp: return ISD::SETCC;
+ case PHI: return 0;
+ case Call: return 0;
+ case Select: return ISD::SELECT;
+ case UserOp1: return 0;
+ case UserOp2: return 0;
+ case VAArg: return 0;
+ case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
+ case InsertElement: return ISD::INSERT_VECTOR_ELT;
+ case ShuffleVector: return ISD::VECTOR_SHUFFLE;
+ case ExtractValue: return ISD::MERGE_VALUES;
+ case InsertValue: return ISD::MERGE_VALUES;
+ case LandingPad: return 0;
+ }
+
+ llvm_unreachable("Unknown instruction type encountered!");
+}
+
+std::pair<int, MVT>
+TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
+ Type *Ty) const {
+ LLVMContext &C = Ty->getContext();
+ EVT MTy = getValueType(DL, Ty);
+
+ int Cost = 1;
+ // We keep legalizing the type until we find a legal kind. We assume that
+ // the only operation that costs anything is the split. After splitting
+ // we need to handle two types.
+ while (true) {
+ LegalizeKind LK = getTypeConversion(C, MTy);
+
+ if (LK.first == TypeLegal)
+ return std::make_pair(Cost, MTy.getSimpleVT());
+
+ if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
+ Cost *= 2;
+
+ // Do not loop with f128 type.
+ if (MTy == LK.second)
+ return std::make_pair(Cost, MTy.getSimpleVT());
+
+ // Keep legalizing the type.
+ MTy = LK.second;
+ }
+}
+
+Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+ if (!TM.getTargetTriple().isAndroid())
+ return nullptr;
+
+ // Android provides a libc function to retrieve the address of the current
+ // thread's unsafe stack pointer.
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+ Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
+ StackPtrTy->getPointerTo(0), nullptr);
+ return IRB.CreateCall(Fn);
+}
+
+//===----------------------------------------------------------------------===//
+// Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
+ // The default implementation of this implements a conservative RISCy, r+r and
+ // r+i addr mode.
+
+ // Allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default: // Don't allow n * r
+ return false;
+ }
+
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Protector
+//===----------------------------------------------------------------------===//
+
+// For OpenBSD return its special guard variable. Otherwise return nullptr,
+// so that SelectionDAG handle SSP.
+Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
+ if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
+ Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
+ PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
+ auto Guard = cast<GlobalValue>(M.getOrInsertGlobal("__guard_local", PtrTy));
+ Guard->setVisibility(GlobalValue::HiddenVisibility);
+ return Guard;
+ }
+ return nullptr;
+}
+
+// Currently only support "standard" __stack_chk_guard.
+// TODO: add LOAD_STACK_GUARD support.
+void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
+ M.getOrInsertGlobal("__stack_chk_guard", Type::getInt8PtrTy(M.getContext()));
+}
+
+// Currently only support "standard" __stack_chk_guard.
+// TODO: add LOAD_STACK_GUARD support.
+Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
+ return M.getGlobalVariable("__stack_chk_guard", true);
+}
+
+Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
+ return nullptr;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 000000000000..5f814c957e92
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,1098 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+// ELF
+//===----------------------------------------------------------------------===//
+
+MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
+ const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+ MachineModuleInfo *MMI) const {
+ unsigned Encoding = getPersonalityEncoding();
+ if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
+ return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
+ TM.getSymbol(GV, Mang)->getName());
+ if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
+ return TM.getSymbol(GV, Mang);
+ report_fatal_error("We do not support this DWARF encoding yet!");
+}
+
+void TargetLoweringObjectFileELF::emitPersonalityValue(
+ MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const {
+ SmallString<64> NameData("DW.ref.");
+ NameData += Sym->getName();
+ MCSymbolELF *Label =
+ cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData));
+ Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
+ Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
+ MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(),
+ ELF::SHT_PROGBITS, Flags, 0);
+ unsigned Size = DL.getPointerSize();
+ Streamer.SwitchSection(Sec);
+ Streamer.EmitValueToAlignment(DL.getPointerABIAlignment());
+ Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
+ const MCExpr *E = MCConstantExpr::create(Size, getContext());
+ Streamer.emitELFSize(Label, E);
+ Streamer.EmitLabel(Label);
+
+ Streamer.EmitSymbolValue(Sym, Size);
+}
+
+const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
+ const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
+ const TargetMachine &TM, MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const {
+
+ if (Encoding & dwarf::DW_EH_PE_indirect) {
+ MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM);
+
+ // Add information about the stub reference to ELFMMI so that the stub
+ // gets emitted by the asmprinter.
+ MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+ if (!StubSym.getPointer()) {
+ MCSymbol *Sym = TM.getSymbol(GV, Mang);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
+ Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeGlobalReference(GV, Encoding, Mang, TM, MMI, Streamer);
+}
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+ // N.B.: The defaults used in here are no the same ones used in MC.
+ // We follow gcc, MC follows gas. For example, given ".section .eh_frame",
+ // both gas and MC will produce a section with no flags. Given
+ // section(".eh_frame") gcc will produce:
+ //
+ // .section .eh_frame,"a",@progbits
+
+ if (Name == getInstrProfCoverageSectionName(false))
+ return SectionKind::getMetadata();
+
+ if (Name.empty() || Name[0] != '.') return K;
+
+ // Some lame default implementation based on some magic section names.
+ if (Name == ".bss" ||
+ Name.startswith(".bss.") ||
+ Name.startswith(".gnu.linkonce.b.") ||
+ Name.startswith(".llvm.linkonce.b.") ||
+ Name == ".sbss" ||
+ Name.startswith(".sbss.") ||
+ Name.startswith(".gnu.linkonce.sb.") ||
+ Name.startswith(".llvm.linkonce.sb."))
+ return SectionKind::getBSS();
+
+ if (Name == ".tdata" ||
+ Name.startswith(".tdata.") ||
+ Name.startswith(".gnu.linkonce.td.") ||
+ Name.startswith(".llvm.linkonce.td."))
+ return SectionKind::getThreadData();
+
+ if (Name == ".tbss" ||
+ Name.startswith(".tbss.") ||
+ Name.startswith(".gnu.linkonce.tb.") ||
+ Name.startswith(".llvm.linkonce.tb."))
+ return SectionKind::getThreadBSS();
+
+ return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+ if (Name == ".init_array")
+ return ELF::SHT_INIT_ARRAY;
+
+ if (Name == ".fini_array")
+ return ELF::SHT_FINI_ARRAY;
+
+ if (Name == ".preinit_array")
+ return ELF::SHT_PREINIT_ARRAY;
+
+ if (K.isBSS() || K.isThreadBSS())
+ return ELF::SHT_NOBITS;
+
+ return ELF::SHT_PROGBITS;
+}
+
+static unsigned getELFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (!K.isMetadata())
+ Flags |= ELF::SHF_ALLOC;
+
+ if (K.isText())
+ Flags |= ELF::SHF_EXECINSTR;
+
+ if (K.isWriteable())
+ Flags |= ELF::SHF_WRITE;
+
+ if (K.isThreadLocal())
+ Flags |= ELF::SHF_TLS;
+
+ if (K.isMergeableCString() || K.isMergeableConst())
+ Flags |= ELF::SHF_MERGE;
+
+ if (K.isMergeableCString())
+ Flags |= ELF::SHF_STRINGS;
+
+ return Flags;
+}
+
+static const Comdat *getELFComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return nullptr;
+
+ if (C->getSelectionKind() != Comdat::Any)
+ report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" +
+ C->getName() + "' cannot be lowered.");
+
+ return C;
+}
+
+MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ StringRef SectionName = GV->getSection();
+
+ // Infer section flags from the section name if we can.
+ Kind = getELFKindForNamedSection(SectionName, Kind);
+
+ StringRef Group = "";
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (const Comdat *C = getELFComdat(GV)) {
+ Group = C->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+ return getContext().getELFSection(SectionName,
+ getELFSectionType(SectionName, Kind), Flags,
+ /*EntrySize=*/0, Group);
+}
+
+/// Return the section prefix name used by options FunctionsSections and
+/// DataSections.
+static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
+ if (Kind.isText())
+ return ".text";
+ if (Kind.isReadOnly())
+ return ".rodata";
+ if (Kind.isBSS())
+ return ".bss";
+ if (Kind.isThreadData())
+ return ".tdata";
+ if (Kind.isThreadBSS())
+ return ".tbss";
+ if (Kind.isData())
+ return ".data";
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return ".data.rel.ro";
+}
+
+static MCSectionELF *
+selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection,
+ unsigned Flags, unsigned *NextUniqueID) {
+ unsigned EntrySize = 0;
+ if (Kind.isMergeableCString()) {
+ if (Kind.isMergeable2ByteCString()) {
+ EntrySize = 2;
+ } else if (Kind.isMergeable4ByteCString()) {
+ EntrySize = 4;
+ } else {
+ EntrySize = 1;
+ assert(Kind.isMergeable1ByteCString() && "unknown string width");
+ }
+ } else if (Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4()) {
+ EntrySize = 4;
+ } else if (Kind.isMergeableConst8()) {
+ EntrySize = 8;
+ } else if (Kind.isMergeableConst16()) {
+ EntrySize = 16;
+ } else {
+ assert(Kind.isMergeableConst32() && "unknown data width");
+ EntrySize = 32;
+ }
+ }
+
+ StringRef Group = "";
+ if (const Comdat *C = getELFComdat(GV)) {
+ Flags |= ELF::SHF_GROUP;
+ Group = C->getName();
+ }
+
+ bool UniqueSectionNames = TM.getUniqueSectionNames();
+ SmallString<128> Name;
+ if (Kind.isMergeableCString()) {
+ // We also need alignment here.
+ // FIXME: this is getting the alignment of the character, not the
+ // alignment of the global!
+ unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GV));
+
+ std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+ Name = SizeSpec + utostr(Align);
+ } else if (Kind.isMergeableConst()) {
+ Name = ".rodata.cst";
+ Name += utostr(EntrySize);
+ } else {
+ Name = getSectionPrefixForGlobal(Kind);
+ }
+ // FIXME: Extend the section prefix to include hotness catagories such as .hot
+ // or .unlikely for functions.
+
+ if (EmitUniqueSection && UniqueSectionNames) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GV, Mang, true);
+ }
+ unsigned UniqueID = MCContext::GenericSectionID;
+ if (EmitUniqueSection && !UniqueSectionNames) {
+ UniqueID = *NextUniqueID;
+ (*NextUniqueID)++;
+ }
+ return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
+ EntrySize, Group, UniqueID);
+}
+
+MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ unsigned Flags = getELFSectionFlags(Kind);
+
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniqueSection = false;
+ if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) {
+ if (Kind.isText())
+ EmitUniqueSection = TM.getFunctionSections();
+ else
+ EmitUniqueSection = TM.getDataSections();
+ }
+ EmitUniqueSection |= GV->hasComdat();
+
+ return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
+ EmitUniqueSection, Flags, &NextUniqueID);
+}
+
+MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
+ const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ const Comdat *C = F.getComdat();
+ bool EmitUniqueSection = TM.getFunctionSections() || C;
+ if (!EmitUniqueSection)
+ return ReadOnlySection;
+
+ return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
+ Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC,
+ &NextUniqueID);
+}
+
+bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
+}
+
+/// Given a mergeable constant with the specified size and relocation
+/// information, return a section that it should be placed in.
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ unsigned &Align) const {
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return MergeableConst4Section;
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return MergeableConst8Section;
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return MergeableConst16Section;
+ if (Kind.isMergeableConst32() && MergeableConst32Section)
+ return MergeableConst32Section;
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return DataRelROSection;
+}
+
+static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray,
+ bool IsCtor, unsigned Priority,
+ const MCSymbol *KeySym) {
+ std::string Name;
+ unsigned Type;
+ unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE;
+ StringRef COMDAT = KeySym ? KeySym->getName() : "";
+
+ if (KeySym)
+ Flags |= ELF::SHF_GROUP;
+
+ if (UseInitArray) {
+ if (IsCtor) {
+ Type = ELF::SHT_INIT_ARRAY;
+ Name = ".init_array";
+ } else {
+ Type = ELF::SHT_FINI_ARRAY;
+ Name = ".fini_array";
+ }
+ if (Priority != 65535) {
+ Name += '.';
+ Name += utostr(Priority);
+ }
+ } else {
+ // The default scheme is .ctor / .dtor, so we have to invert the priority
+ // numbering.
+ if (IsCtor)
+ Name = ".ctors";
+ else
+ Name = ".dtors";
+ if (Priority != 65535) {
+ Name += '.';
+ Name += utostr(65535 - Priority);
+ }
+ Type = ELF::SHT_PROGBITS;
+ }
+
+ return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT);
+}
+
+MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getStaticStructorSection(getContext(), UseInitArray, true, Priority,
+ KeySym);
+}
+
+MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getStaticStructorSection(getContext(), UseInitArray, false, Priority,
+ KeySym);
+}
+
+const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS, Mangler &Mang,
+ const TargetMachine &TM) const {
+ // We may only use a PLT-relative relocation to refer to unnamed_addr
+ // functions.
+ if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
+ return nullptr;
+
+ // Basic sanity checks.
+ if (LHS->getType()->getPointerAddressSpace() != 0 ||
+ RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
+ RHS->isThreadLocal())
+ return nullptr;
+
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(TM.getSymbol(LHS, Mang), PLTRelativeVariantKind,
+ getContext()),
+ MCSymbolRefExpr::create(TM.getSymbol(RHS, Mang), getContext()),
+ getContext());
+}
+
+void
+TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
+ UseInitArray = UseInitArray_;
+ if (!UseInitArray)
+ return;
+
+ StaticCtorSection = getContext().getELFSection(
+ ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ StaticDtorSection = getContext().getELFSection(
+ ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+}
+
+//===----------------------------------------------------------------------===//
+// MachO
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
+ : TargetLoweringObjectFile() {
+ SupportIndirectSymViaGOTPCRel = true;
+}
+
+/// emitModuleFlags - Perform code emission for module flags.
+void TargetLoweringObjectFileMachO::
+emitModuleFlags(MCStreamer &Streamer,
+ ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ Mangler &Mang, const TargetMachine &TM) const {
+ unsigned VersionVal = 0;
+ unsigned ImageInfoFlags = 0;
+ MDNode *LinkerOptions = nullptr;
+ StringRef SectionVal;
+
+ for (const auto &MFE : ModuleFlags) {
+ // Ignore flags with 'Require' behavior.
+ if (MFE.Behavior == Module::Require)
+ continue;
+
+ StringRef Key = MFE.Key->getString();
+ Metadata *Val = MFE.Val;
+
+ if (Key == "Objective-C Image Info Version") {
+ VersionVal = mdconst::extract<ConstantInt>(Val)->getZExtValue();
+ } else if (Key == "Objective-C Garbage Collection" ||
+ Key == "Objective-C GC Only" ||
+ Key == "Objective-C Is Simulated" ||
+ Key == "Objective-C Class Properties" ||
+ Key == "Objective-C Image Swift Version") {
+ ImageInfoFlags |= mdconst::extract<ConstantInt>(Val)->getZExtValue();
+ } else if (Key == "Objective-C Image Info Section") {
+ SectionVal = cast<MDString>(Val)->getString();
+ } else if (Key == "Linker Options") {
+ LinkerOptions = cast<MDNode>(Val);
+ }
+ }
+
+ // Emit the linker options if present.
+ if (LinkerOptions) {
+ for (const auto &Option : LinkerOptions->operands()) {
+ SmallVector<std::string, 4> StrOptions;
+ for (const auto &Piece : cast<MDNode>(Option)->operands())
+ StrOptions.push_back(cast<MDString>(Piece)->getString());
+ Streamer.EmitLinkerOptions(StrOptions);
+ }
+ }
+
+ // The section is mandatory. If we don't have it, then we don't have GC info.
+ if (SectionVal.empty()) return;
+
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty())
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Invalid section specifier '" + Section + "': " +
+ ErrorCode + ".");
+
+ // Get the section.
+ MCSectionMachO *S = getContext().getMachOSection(
+ Segment, Section, TAA, StubSize, SectionKind::getData());
+ Streamer.SwitchSection(S);
+ Streamer.EmitLabel(getContext().
+ getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+ Streamer.EmitIntValue(VersionVal, 4);
+ Streamer.EmitIntValue(ImageInfoFlags, 4);
+ Streamer.AddBlankLine();
+}
+
+static void checkMachOComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return;
+
+ report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() +
+ "' cannot be lowered.");
+}
+
+MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ // Parse the section specifier and create it if valid.
+ StringRef Segment, Section;
+ unsigned TAA = 0, StubSize = 0;
+ bool TAAParsed;
+
+ checkMachOComdat(GV);
+
+ std::string ErrorCode =
+ MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+ TAA, TAAParsed, StubSize);
+ if (!ErrorCode.empty()) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' has an invalid section specifier '" +
+ GV->getSection() + "': " + ErrorCode + ".");
+ }
+
+ // Get the section.
+ MCSectionMachO *S =
+ getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+ // If TAA wasn't set by ParseSectionSpecifier() above,
+ // use the value returned by getMachOSection() as a default.
+ if (!TAAParsed)
+ TAA = S->getTypeAndAttributes();
+
+ // Okay, now that we got the section, verify that the TAA & StubSize agree.
+ // If the user declared multiple globals with different section flags, we need
+ // to reject it here.
+ if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+ // If invalid, report the error with report_fatal_error.
+ report_fatal_error("Global variable '" + GV->getName() +
+ "' section type or attributes does not match previous"
+ " section specifier");
+ }
+
+ return S;
+}
+
+MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ checkMachOComdat(GV);
+
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
+
+ if (Kind.isText())
+ return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+ // If this is weak/linkonce, put this in a coalescable section, either in text
+ // or data depending on if it is writable.
+ if (GV->isWeakForLinker()) {
+ if (Kind.isReadOnly())
+ return ConstTextCoalSection;
+ return DataCoalSection;
+ }
+
+ // FIXME: Alignment check should be handled by section classifier.
+ if (Kind.isMergeable1ByteCString() &&
+ GV->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
+ return CStringSection;
+
+ // Do not put 16-bit arrays in the UString section if they have an
+ // externally visible label, this runs into issues with certain linker
+ // versions.
+ if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
+ GV->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GV)) < 32)
+ return UStringSection;
+
+ // With MachO only variables whose corresponding symbol starts with 'l' or
+ // 'L' can be merged, so we only try merging GVs with private linkage.
+ if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) {
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16())
+ return SixteenByteConstantSection;
+ }
+
+ // Otherwise, if it is readonly, but not something we can specially optimize,
+ // just drop it in .const.
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
+ // If this is marked const, put it into a const section. But if the dynamic
+ // linker needs to write to it, put it in the data segment.
+ if (Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ // Put zero initialized globals with strong external linkage in the
+ // DATA, __common section with the .zerofill directive.
+ if (Kind.isBSSExtern())
+ return DataCommonSection;
+
+ // Put zero initialized globals with local linkage in __DATA,__bss directive
+ // with the .zerofill directive (aka .lcomm).
+ if (Kind.isBSSLocal())
+ return DataBSSSection;
+
+ // Otherwise, just drop the variable in the normal data section.
+ return DataSection;
+}
+
+MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ unsigned &Align) const {
+ // If this constant requires a relocation, we have to put it in the data
+ // segment, not in the text segment.
+ if (Kind.isData() || Kind.isReadOnlyWithRel())
+ return ConstDataSection;
+
+ if (Kind.isMergeableConst4())
+ return FourByteConstantSection;
+ if (Kind.isMergeableConst8())
+ return EightByteConstantSection;
+ if (Kind.isMergeableConst16())
+ return SixteenByteConstantSection;
+ return ReadOnlySection; // .const
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
+ const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
+ const TargetMachine &TM, MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+
+ if (Encoding & DW_EH_PE_indirect) {
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ MCSymbol *SSym =
+ getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+ if (!StubSym.getPointer()) {
+ MCSymbol *Sym = TM.getSymbol(GV, Mang);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return TargetLoweringObjectFile::
+ getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
+ Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ }
+
+ return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, Mang,
+ TM, MMI, Streamer);
+}
+
+MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
+ const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+ MachineModuleInfo *MMI) const {
+ // The mach-o version of this method defaults to returning a stub reference.
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+ if (!StubSym.getPointer()) {
+ MCSymbol *Sym = TM.getSymbol(GV, Mang);
+ StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+ }
+
+ return SSym;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
+ const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation
+ // as 64-bit do, we replace the GOT equivalent by accessing the final symbol
+ // through a non_lazy_ptr stub instead. One advantage is that it allows the
+ // computation of deltas to final external symbols. Example:
+ //
+ // _extgotequiv:
+ // .long _extfoo
+ //
+ // _delta:
+ // .long _extgotequiv-_delta
+ //
+ // is transformed to:
+ //
+ // _delta:
+ // .long L_extfoo$non_lazy_ptr-(_delta+0)
+ //
+ // .section __IMPORT,__pointers,non_lazy_symbol_pointers
+ // L_extfoo$non_lazy_ptr:
+ // .indirect_symbol _extfoo
+ // .long 0
+ //
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MCContext &Ctx = getContext();
+
+ // The offset must consider the original displacement from the base symbol
+ // since 32-bit targets don't have a GOTPCREL to fold the PC displacement.
+ Offset = -MV.getConstant();
+ const MCSymbol *BaseSym = &MV.getSymB()->getSymbol();
+
+ // Access the final symbol via sym$non_lazy_ptr and generate the appropriated
+ // non_lazy_ptr stubs.
+ SmallString<128> Name;
+ StringRef Suffix = "$non_lazy_ptr";
+ Name += MMI->getModule()->getDataLayout().getPrivateGlobalPrefix();
+ Name += Sym->getName();
+ Name += Suffix;
+ MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */);
+
+ const MCExpr *BSymExpr =
+ MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
+ const MCExpr *LHS =
+ MCSymbolRefExpr::create(Stub, MCSymbolRefExpr::VK_None, Ctx);
+
+ if (!Offset)
+ return MCBinaryExpr::createSub(LHS, BSymExpr, Ctx);
+
+ const MCExpr *RHS =
+ MCBinaryExpr::createAdd(BSymExpr, MCConstantExpr::create(Offset, Ctx), Ctx);
+ return MCBinaryExpr::createSub(LHS, RHS, Ctx);
+}
+
+static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
+ const MCSection &Section) {
+ if (!AsmInfo.isSectionAtomizableBySymbols(Section))
+ return true;
+
+ // If it is not dead stripped, it is safe to use private labels.
+ const MCSectionMachO &SMO = cast<MCSectionMachO>(Section);
+ if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
+ return true;
+
+ return false;
+}
+
+void TargetLoweringObjectFileMachO::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ const TargetMachine &TM) const {
+ SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+ const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM);
+ bool CannotUsePrivateLabel =
+ !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
+ Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
+//===----------------------------------------------------------------------===//
+// COFF
+//===----------------------------------------------------------------------===//
+
+static unsigned
+getCOFFSectionFlags(SectionKind K, const TargetMachine &TM) {
+ unsigned Flags = 0;
+ bool isThumb = TM.getTargetTriple().getArch() == Triple::thumb;
+
+ if (K.isMetadata())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isText())
+ Flags |=
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_CNT_CODE |
+ (isThumb ? COFF::IMAGE_SCN_MEM_16BIT : (COFF::SectionCharacteristics)0);
+ else if (K.isBSS())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isThreadLocal())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isReadOnly() || K.isReadOnlyWithRel())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ;
+ else if (K.isWriteable())
+ Flags |=
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ return Flags;
+}
+
+static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ assert(C && "expected GV to have a Comdat!");
+
+ StringRef ComdatGVName = C->getName();
+ const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName);
+ if (!ComdatGV)
+ report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+ "' does not exist.");
+
+ if (ComdatGV->getComdat() != C)
+ report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+ "' is not a key for its COMDAT.");
+
+ return ComdatGV;
+}
+
+static int getSelectionForCOFF(const GlobalValue *GV) {
+ if (const Comdat *C = GV->getComdat()) {
+ const GlobalValue *ComdatKey = getComdatGVForCOFF(GV);
+ if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey))
+ ComdatKey = GA->getBaseObject();
+ if (ComdatKey == GV) {
+ switch (C->getSelectionKind()) {
+ case Comdat::Any:
+ return COFF::IMAGE_COMDAT_SELECT_ANY;
+ case Comdat::ExactMatch:
+ return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH;
+ case Comdat::Largest:
+ return COFF::IMAGE_COMDAT_SELECT_LARGEST;
+ case Comdat::NoDuplicates:
+ return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+ case Comdat::SameSize:
+ return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE;
+ }
+ } else {
+ return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
+ }
+ }
+ return 0;
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ int Selection = 0;
+ unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
+ StringRef Name = GV->getSection();
+ StringRef COMDATSymName = "";
+ if (GV->hasComdat()) {
+ Selection = getSelectionForCOFF(GV);
+ const GlobalValue *ComdatGV;
+ if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
+ ComdatGV = getComdatGVForCOFF(GV);
+ else
+ ComdatGV = GV;
+
+ if (!ComdatGV->hasPrivateLinkage()) {
+ MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+ COMDATSymName = Sym->getName();
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ } else {
+ Selection = 0;
+ }
+ }
+
+ return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
+ Selection);
+}
+
+static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
+ if (Kind.isText())
+ return ".text";
+ if (Kind.isBSS())
+ return ".bss";
+ if (Kind.isThreadLocal())
+ return ".tls$";
+ if (Kind.isReadOnly() || Kind.isReadOnlyWithRel())
+ return ".rdata";
+ return ".data";
+}
+
+MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ // If we have -ffunction-sections then we should emit the global value to a
+ // uniqued section specifically for it.
+ bool EmitUniquedSection;
+ if (Kind.isText())
+ EmitUniquedSection = TM.getFunctionSections();
+ else
+ EmitUniquedSection = TM.getDataSections();
+
+ if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) {
+ const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
+ unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
+
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ int Selection = getSelectionForCOFF(GV);
+ if (!Selection)
+ Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+ const GlobalValue *ComdatGV;
+ if (GV->hasComdat())
+ ComdatGV = getComdatGVForCOFF(GV);
+ else
+ ComdatGV = GV;
+
+ unsigned UniqueID = MCContext::GenericSectionID;
+ if (EmitUniquedSection)
+ UniqueID = NextUniqueID++;
+
+ if (!ComdatGV->hasPrivateLinkage()) {
+ MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+ StringRef COMDATSymName = Sym->getName();
+ return getContext().getCOFFSection(Name, Characteristics, Kind,
+ COMDATSymName, Selection, UniqueID);
+ } else {
+ SmallString<256> TmpData;
+ Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true);
+ return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
+ Selection, UniqueID);
+ }
+ }
+
+ if (Kind.isText())
+ return TextSection;
+
+ if (Kind.isThreadLocal())
+ return TLSDataSection;
+
+ if (Kind.isReadOnly() || Kind.isReadOnlyWithRel())
+ return ReadOnlySection;
+
+ // Note: we claim that common symbols are put in BSSSection, but they are
+ // really emitted with the magic .comm directive, which creates a symbol table
+ // entry but not a section.
+ if (Kind.isBSS() || Kind.isCommon())
+ return BSSSection;
+
+ return DataSection;
+}
+
+void TargetLoweringObjectFileCOFF::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ const TargetMachine &TM) const {
+ bool CannotUsePrivateLabel = false;
+ if (GV->hasPrivateLinkage() &&
+ ((isa<Function>(GV) && TM.getFunctionSections()) ||
+ (isa<GlobalVariable>(GV) && TM.getDataSections())))
+ CannotUsePrivateLabel = true;
+
+ Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
+ const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ const Comdat *C = F.getComdat();
+ bool EmitUniqueSection = TM.getFunctionSections() || C;
+ if (!EmitUniqueSection)
+ return ReadOnlySection;
+
+ // FIXME: we should produce a symbol for F instead.
+ if (F.hasPrivateLinkage())
+ return ReadOnlySection;
+
+ MCSymbol *Sym = TM.getSymbol(&F, Mang);
+ StringRef COMDATSymName = Sym->getName();
+
+ SectionKind Kind = SectionKind::getReadOnly();
+ const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
+ unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ unsigned UniqueID = NextUniqueID++;
+
+ return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
+}
+
+void TargetLoweringObjectFileCOFF::
+emitModuleFlags(MCStreamer &Streamer,
+ ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ Mangler &Mang, const TargetMachine &TM) const {
+ MDNode *LinkerOptions = nullptr;
+
+ for (const auto &MFE : ModuleFlags) {
+ StringRef Key = MFE.Key->getString();
+ if (Key == "Linker Options")
+ LinkerOptions = cast<MDNode>(MFE.Val);
+ }
+
+ if (LinkerOptions) {
+ // Emit the linker options to the linker .drectve section. According to the
+ // spec, this section is a space-separated string containing flags for
+ // linker.
+ MCSection *Sec = getDrectveSection();
+ Streamer.SwitchSection(Sec);
+ for (const auto &Option : LinkerOptions->operands()) {
+ for (const auto &Piece : cast<MDNode>(Option)->operands()) {
+ // Lead with a space for consistency with our dllexport implementation.
+ std::string Directive(" ");
+ Directive.append(cast<MDString>(Piece)->getString());
+ Streamer.EmitBytes(Directive);
+ }
+ }
+ }
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getContext().getAssociativeCOFFSection(
+ cast<MCSectionCOFF>(StaticCtorSection), KeySym, 0);
+}
+
+MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getContext().getAssociativeCOFFSection(
+ cast<MCSectionCOFF>(StaticDtorSection), KeySym, 0);
+}
+
+void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
+ raw_ostream &OS, const GlobalValue *GV, const Mangler &Mang) const {
+ if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
+ return;
+
+ const Triple &TT = getTargetTriple();
+
+ if (TT.isKnownWindowsMSVCEnvironment())
+ OS << " /EXPORT:";
+ else
+ OS << " -export:";
+
+ if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
+ std::string Flag;
+ raw_string_ostream FlagOS(Flag);
+ Mang.getNameWithPrefix(FlagOS, GV, false);
+ FlagOS.flush();
+ if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
+ OS << Flag.substr(1);
+ else
+ OS << Flag;
+ } else {
+ Mang.getNameWithPrefix(OS, GV, false);
+ }
+
+ if (!GV->getValueType()->isFunctionTy()) {
+ if (TT.isKnownWindowsMSVCEnvironment())
+ OS << ",DATA";
+ else
+ OS << ",data";
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 000000000000..8d2048fa047f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,49 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+ // Check to see if we should eliminate all frame pointers.
+ if (MF.getSubtarget().getFrameLowering()->noFramePointerElim(MF))
+ return true;
+
+ // Check to see if we should eliminate non-leaf frame pointers.
+ if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf"))
+ return MF.getFrameInfo()->hasCalls();
+
+ return false;
+}
+
+/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+/// is specified on the command line. When this flag is off(default), the
+/// code generator is not allowed to generate mad (multiply add) if the
+/// result is "less precise" than doing those operations individually.
+bool TargetOptions::LessPreciseFPMAD() const {
+ return UnsafeFPMath || LessPreciseFPMADOption;
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+ return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
new file mode 100644
index 000000000000..b8c820942cb5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -0,0 +1,888 @@
+//===-- TargetPassConfig.cpp - Target independent code generation passes --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
+#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+ cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+ cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+ cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+ cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> DisableBlockPlacement("disable-block-placement",
+ cl::Hidden, cl::desc("Disable probability-driven block placement"));
+static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
+ cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+ cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+ cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden,
+ cl::desc("Disable Early If-conversion"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+ cl::desc("Disable Machine Common Subexpression Elimination"));
+static cl::opt<cl::boolOrDefault> OptimizeRegAlloc(
+ "optimize-regalloc", cl::Hidden,
+ cl::desc("Enable optimized register allocation compilation path."));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+ cl::Hidden,
+ cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+ cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+ cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting",
+ cl::Hidden, cl::desc("Disable ConstantHoisting"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+ cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
+ cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inlining",
+ cl::Hidden, cl::desc("Disable Partial Libcall Inlining"));
+static cl::opt<bool> EnableImplicitNullChecks(
+ "enable-implicit-null-checks",
+ cl::desc("Fold null checks into faulting memory operations"),
+ cl::init(false));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+ cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+ cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+ cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+ cl::desc("Verify generated machine code"),
+ cl::init(false),
+ cl::ZeroOrMore);
+
+static cl::opt<std::string>
+PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
+ cl::desc("Print machine instrs"),
+ cl::value_desc("pass-name"), cl::init("option-unspecified"));
+
+// Temporary option to allow experimenting with MachineScheduler as a post-RA
+// scheduler. Targets can "properly" enable this with
+// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
+// Targets can return true in targetSchedulesPostRAScheduling() and
+// insert a PostRA scheduling pass wherever it wants.
+cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
+ cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
+
+// Experimental option to run live interval analysis early.
+static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
+ cl::desc("Run live interval analysis earlier in the pipeline"));
+
+// Experimental option to use CFL-AA in codegen
+enum class CFLAAType { None, Steensgaard, Andersen, Both };
+static cl::opt<CFLAAType> UseCFLAA(
+ "use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden,
+ cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"),
+ cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"),
+ clEnumValN(CFLAAType::Steensgaard, "steens",
+ "Enable unification-based CFL-AA"),
+ clEnumValN(CFLAAType::Andersen, "anders",
+ "Enable inclusion-based CFL-AA"),
+ clEnumValN(CFLAAType::Both, "both",
+ "Enable both variants of CFL-AA"),
+ clEnumValEnd));
+
+/// Allow standard passes to be disabled by command line options. This supports
+/// simple binary flags that either suppress the pass or do nothing.
+/// i.e. -disable-mypass=false has no effect.
+/// These should be converted to boolOrDefault in order to use applyOverride.
+static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
+ bool Override) {
+ if (Override)
+ return IdentifyingPassPtr();
+ return PassID;
+}
+
+/// Allow standard passes to be disabled by the command line, regardless of who
+/// is adding the pass.
+///
+/// StandardID is the pass identified in the standard pass pipeline and provided
+/// to addPass(). It may be a target-specific ID in the case that the target
+/// directly adds its own pass, but in that case we harmlessly fall through.
+///
+/// TargetID is the pass that the target has configured to override StandardID.
+///
+/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
+/// pass to run. This allows multiple options to control a single pass depending
+/// on where in the pipeline that pass is added.
+static IdentifyingPassPtr overridePass(AnalysisID StandardID,
+ IdentifyingPassPtr TargetID) {
+ if (StandardID == &PostRASchedulerID)
+ return applyDisable(TargetID, DisablePostRA);
+
+ if (StandardID == &BranchFolderPassID)
+ return applyDisable(TargetID, DisableBranchFold);
+
+ if (StandardID == &TailDuplicateID)
+ return applyDisable(TargetID, DisableTailDuplicate);
+
+ if (StandardID == &TargetPassConfig::EarlyTailDuplicateID)
+ return applyDisable(TargetID, DisableEarlyTailDup);
+
+ if (StandardID == &MachineBlockPlacementID)
+ return applyDisable(TargetID, DisableBlockPlacement);
+
+ if (StandardID == &StackSlotColoringID)
+ return applyDisable(TargetID, DisableSSC);
+
+ if (StandardID == &DeadMachineInstructionElimID)
+ return applyDisable(TargetID, DisableMachineDCE);
+
+ if (StandardID == &EarlyIfConverterID)
+ return applyDisable(TargetID, DisableEarlyIfConversion);
+
+ if (StandardID == &MachineLICMID)
+ return applyDisable(TargetID, DisableMachineLICM);
+
+ if (StandardID == &MachineCSEID)
+ return applyDisable(TargetID, DisableMachineCSE);
+
+ if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
+ return applyDisable(TargetID, DisablePostRAMachineLICM);
+
+ if (StandardID == &MachineSinkingID)
+ return applyDisable(TargetID, DisableMachineSink);
+
+ if (StandardID == &MachineCopyPropagationID)
+ return applyDisable(TargetID, DisableCopyProp);
+
+ return TargetID;
+}
+
+//===---------------------------------------------------------------------===//
+/// TargetPassConfig
+//===---------------------------------------------------------------------===//
+
+INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
+ "Target Pass Configuration", false, false)
+char TargetPassConfig::ID = 0;
+
+// Pseudo Pass IDs.
+char TargetPassConfig::EarlyTailDuplicateID = 0;
+char TargetPassConfig::PostRAMachineLICMID = 0;
+
+namespace {
+struct InsertedPass {
+ AnalysisID TargetPassID;
+ IdentifyingPassPtr InsertedPassID;
+ bool VerifyAfter;
+ bool PrintAfter;
+
+ InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
+ bool VerifyAfter, bool PrintAfter)
+ : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID),
+ VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {}
+
+ Pass *getInsertedPass() const {
+ assert(InsertedPassID.isValid() && "Illegal Pass ID!");
+ if (InsertedPassID.isInstance())
+ return InsertedPassID.getInstance();
+ Pass *NP = Pass::createPass(InsertedPassID.getID());
+ assert(NP && "Pass ID not registered");
+ return NP;
+ }
+};
+}
+
+namespace llvm {
+class PassConfigImpl {
+public:
+ // List of passes explicitly substituted by this target. Normally this is
+ // empty, but it is a convenient way to suppress or replace specific passes
+ // that are part of a standard pass pipeline without overridding the entire
+ // pipeline. This mechanism allows target options to inherit a standard pass's
+ // user interface. For example, a target may disable a standard pass by
+ // default by substituting a pass ID of zero, and the user may still enable
+ // that standard pass with an explicit command line option.
+ DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses;
+
+ /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
+ /// is inserted after each instance of the first one.
+ SmallVector<InsertedPass, 4> InsertedPasses;
+};
+} // namespace llvm
+
+// Out of line virtual method.
+TargetPassConfig::~TargetPassConfig() {
+ delete Impl;
+}
+
+// Out of line constructor provides default values for pass options and
+// registers all common codegen passes.
+TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
+ : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
+ StopAfter(nullptr), Started(true), Stopped(false),
+ AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
+ DisableVerify(false), EnableTailMerge(true) {
+
+ Impl = new PassConfigImpl();
+
+ // Register all target independent codegen passes to activate their PassIDs,
+ // including this pass itself.
+ initializeCodeGen(*PassRegistry::getPassRegistry());
+
+ // Also register alias analysis passes required by codegen passes.
+ initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+
+ // Substitute Pseudo Pass IDs for real ones.
+ substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
+ substitutePass(&PostRAMachineLICMID, &MachineLICMID);
+
+ if (StringRef(PrintMachineInstrs.getValue()).equals(""))
+ TM->Options.PrintMachineCode = true;
+}
+
+CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
+ return TM->getOptLevel();
+}
+
+/// Insert InsertedPassID pass after TargetPassID.
+void TargetPassConfig::insertPass(AnalysisID TargetPassID,
+ IdentifyingPassPtr InsertedPassID,
+ bool VerifyAfter, bool PrintAfter) {
+ assert(((!InsertedPassID.isInstance() &&
+ TargetPassID != InsertedPassID.getID()) ||
+ (InsertedPassID.isInstance() &&
+ TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
+ "Insert a pass after itself!");
+ Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter,
+ PrintAfter);
+}
+
+/// createPassConfig - Create a pass configuration object to be used by
+/// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+///
+/// Targets may override this to extend TargetPassConfig.
+TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new TargetPassConfig(this, PM);
+}
+
+TargetPassConfig::TargetPassConfig()
+ : ImmutablePass(ID), PM(nullptr) {
+ llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
+}
+
+// Helper to verify the analysis is really immutable.
+void TargetPassConfig::setOpt(bool &Opt, bool Val) {
+ assert(!Initialized && "PassConfig is immutable");
+ Opt = Val;
+}
+
+void TargetPassConfig::substitutePass(AnalysisID StandardID,
+ IdentifyingPassPtr TargetID) {
+ Impl->TargetPasses[StandardID] = TargetID;
+}
+
+IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+ DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator
+ I = Impl->TargetPasses.find(ID);
+ if (I == Impl->TargetPasses.end())
+ return ID;
+ return I->second;
+}
+
+bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const {
+ IdentifyingPassPtr TargetID = getPassSubstitution(ID);
+ IdentifyingPassPtr FinalPtr = overridePass(ID, TargetID);
+ return !FinalPtr.isValid() || FinalPtr.isInstance() ||
+ FinalPtr.getID() != ID;
+}
+
+/// Add a pass to the PassManager if that pass is supposed to be run. If the
+/// Started/Stopped flags indicate either that the compilation should start at
+/// a later pass or that it should stop after an earlier pass, then do not add
+/// the pass. Finally, compare the current pass against the StartAfter
+/// and StopAfter options and change the Started/Stopped flags accordingly.
+void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
+ assert(!Initialized && "PassConfig is immutable");
+
+ // Cache the Pass ID here in case the pass manager finds this pass is
+ // redundant with ones already scheduled / available, and deletes it.
+ // Fundamentally, once we add the pass to the manager, we no longer own it
+ // and shouldn't reference it.
+ AnalysisID PassID = P->getPassID();
+
+ if (StartBefore == PassID)
+ Started = true;
+ if (Started && !Stopped) {
+ std::string Banner;
+ // Construct banner message before PM->add() as that may delete the pass.
+ if (AddingMachinePasses && (printAfter || verifyAfter))
+ Banner = std::string("After ") + std::string(P->getPassName());
+ PM->add(P);
+ if (AddingMachinePasses) {
+ if (printAfter)
+ addPrintPass(Banner);
+ if (verifyAfter)
+ addVerifyPass(Banner);
+ }
+
+ // Add the passes after the pass P if there is any.
+ for (auto IP : Impl->InsertedPasses) {
+ if (IP.TargetPassID == PassID)
+ addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter);
+ }
+ } else {
+ delete P;
+ }
+ if (StopAfter == PassID)
+ Stopped = true;
+ if (StartAfter == PassID)
+ Started = true;
+ if (Stopped && !Started)
+ report_fatal_error("Cannot stop compilation after pass that is not run");
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+///
+/// addPass cannot return a pointer to the pass instance because is internal the
+/// PassManager and the instance we create here may already be freed.
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter,
+ bool printAfter) {
+ IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
+ IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
+ if (!FinalPtr.isValid())
+ return nullptr;
+
+ Pass *P;
+ if (FinalPtr.isInstance())
+ P = FinalPtr.getInstance();
+ else {
+ P = Pass::createPass(FinalPtr.getID());
+ if (!P)
+ llvm_unreachable("Pass ID not registered");
+ }
+ AnalysisID FinalID = P->getPassID();
+ addPass(P, verifyAfter, printAfter); // Ends the lifetime of P.
+
+ return FinalID;
+}
+
+void TargetPassConfig::printAndVerify(const std::string &Banner) {
+ addPrintPass(Banner);
+ addVerifyPass(Banner);
+}
+
+void TargetPassConfig::addPrintPass(const std::string &Banner) {
+ if (TM->shouldPrintMachineCode())
+ PM->add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+
+void TargetPassConfig::addVerifyPass(const std::string &Banner) {
+ if (VerifyMachineCode)
+ PM->add(createMachineVerifierPass(Banner));
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+void TargetPassConfig::addIRPasses() {
+ switch (UseCFLAA) {
+ case CFLAAType::Steensgaard:
+ addPass(createCFLSteensAAWrapperPass());
+ break;
+ case CFLAAType::Andersen:
+ addPass(createCFLAndersAAWrapperPass());
+ break;
+ case CFLAAType::Both:
+ addPass(createCFLAndersAAWrapperPass());
+ addPass(createCFLSteensAAWrapperPass());
+ break;
+ default:
+ break;
+ }
+
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ addPass(createTypeBasedAAWrapperPass());
+ addPass(createScopedNoAliasAAWrapperPass());
+ addPass(createBasicAAWrapperPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ addPass(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
+ addPass(createLoopStrengthReducePass());
+ if (PrintLSR)
+ addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
+ }
+
+ // Run GC lowering passes for builtin collectors
+ // TODO: add a pass insertion point here
+ addPass(createGCLoweringPass());
+ addPass(createShadowStackGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ addPass(createUnreachableBlockEliminationPass());
+
+ // Prepare expensive constants for SelectionDAG.
+ if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting)
+ addPass(createConstantHoistingPass());
+
+ if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
+ addPass(createPartiallyInlineLibCallsPass());
+}
+
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+void TargetPassConfig::addPassesToHandleExceptions() {
+ switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+ case ExceptionHandling::SjLj:
+ // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+ // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+ // catch info can get misplaced when a selector ends up more than one block
+ // removed from the parent invoke(s). This could happen when a landing
+ // pad is shared by multiple invokes and is also a target of a normal
+ // edge from elsewhere.
+ addPass(createSjLjEHPreparePass());
+ // FALLTHROUGH
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::ARM:
+ addPass(createDwarfEHPass(TM));
+ break;
+ case ExceptionHandling::WinEH:
+ // We support using both GCC-style and MSVC-style exceptions on Windows, so
+ // add both preparation passes. Each pass will only actually run if it
+ // recognizes the personality function.
+ addPass(createWinEHPass(TM));
+ addPass(createDwarfEHPass(TM));
+ break;
+ case ExceptionHandling::None:
+ addPass(createLowerInvokePass());
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ addPass(createUnreachableBlockEliminationPass());
+ break;
+ }
+}
+
+/// Add pass to prepare the LLVM IR for code generation. This should be done
+/// before exception handling preparation passes.
+void TargetPassConfig::addCodeGenPrepare() {
+ if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
+ addPass(createCodeGenPreparePass(TM));
+ addPass(createRewriteSymbolsPass());
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
+ addPreISel();
+
+ // Force codegen to run according to the callgraph.
+ if (TM->Options.EnableIPRA)
+ addPass(new DummyCGSCCPass);
+
+ // Add both the safe stack and the stack protection passes: each of them will
+ // only protect functions that have corresponding attributes.
+ addPass(createSafeStackPass(TM));
+ addPass(createStackProtectorPass(TM));
+
+ if (PrintISelInput)
+ addPass(createPrintFunctionPass(
+ dbgs(), "\n\n*** Final LLVM Code input to ISel ***\n"));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ addPass(createVerifierPass());
+}
+
+/// Add the complete set of target-independent postISel code generator passes.
+///
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
+///
+/// Any TargetPassConfig::addXX routine may be overriden by the Target. The
+/// addPre/Post methods with empty header implementations allow injecting
+/// target-specific fixups just before or after major stages. Additionally,
+/// targets have the flexibility to change pass order within a stage by
+/// overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+///
+/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
+/// before/after any target-independent pass. But it's currently overkill.
+void TargetPassConfig::addMachinePasses() {
+ AddingMachinePasses = true;
+
+ if (TM->Options.EnableIPRA)
+ addPass(createRegUsageInfoPropPass());
+
+ // Insert a machine instr printer pass after the specified pass.
+ if (!StringRef(PrintMachineInstrs.getValue()).equals("") &&
+ !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) {
+ const PassRegistry *PR = PassRegistry::getPassRegistry();
+ const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue());
+ const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
+ assert (TPI && IPI && "Pass ID not registered!");
+ const char *TID = (const char *)(TPI->getTypeInfo());
+ const char *IID = (const char *)(IPI->getTypeInfo());
+ insertPass(TID, IID);
+ }
+
+ // Print the instruction selected machine code...
+ printAndVerify("After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ addPass(&ExpandISelPseudosID);
+
+ // Add passes that optimize machine instructions in SSA form.
+ if (getOptLevel() != CodeGenOpt::None) {
+ addMachineSSAOptimization();
+ } else {
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID, false);
+ }
+
+ // Run pre-ra passes.
+ addPreRegAlloc();
+
+ // Run register allocation and passes that are tightly coupled with it,
+ // including phi elimination and scheduling.
+ if (getOptimizeRegAlloc())
+ addOptimizedRegAlloc(createRegAllocPass(true));
+ else
+ addFastRegAlloc(createRegAllocPass(false));
+
+ // Run post-ra passes.
+ addPostRegAlloc();
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&ShrinkWrapID);
+
+ // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only
+ // do so if it hasn't been disabled, substituted, or overridden.
+ if (!isPassSubstitutedOrOverridden(&PrologEpilogCodeInserterID))
+ addPass(createPrologEpilogInserterPass(TM));
+
+ /// Add passes that optimize machine instructions after register allocation.
+ if (getOptLevel() != CodeGenOpt::None)
+ addMachineLateOptimization();
+
+ // Expand pseudo instructions before second scheduling pass.
+ addPass(&ExpandPostRAPseudosID);
+
+ // Run pre-sched2 passes.
+ addPreSched2();
+
+ if (EnableImplicitNullChecks)
+ addPass(&ImplicitNullChecksID);
+
+ // Second pass scheduler.
+ // Let Target optionally insert this pass by itself at some other
+ // point.
+ if (getOptLevel() != CodeGenOpt::None &&
+ !TM->targetSchedulesPostRAScheduling()) {
+ if (MISchedPostRA)
+ addPass(&PostMachineSchedulerID);
+ else
+ addPass(&PostRASchedulerID);
+ }
+
+ // GC
+ if (addGCPasses()) {
+ if (PrintGCInfo)
+ addPass(createGCInfoPrinter(dbgs()), false, false);
+ }
+
+ // Basic block placement.
+ if (getOptLevel() != CodeGenOpt::None)
+ addBlockPlacement();
+
+ addPreEmitPass();
+
+ if (TM->Options.EnableIPRA)
+ // Collect register usage information and produce a register mask of
+ // clobbered registers, to be used to optimize call sites.
+ addPass(createRegUsageInfoCollector());
+
+ addPass(&FuncletLayoutID, false);
+
+ addPass(&StackMapLivenessID, false);
+ addPass(&LiveDebugValuesID, false);
+
+ addPass(&XRayInstrumentationID, false);
+ addPass(&PatchableFunctionID, false);
+
+ AddingMachinePasses = false;
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+void TargetPassConfig::addMachineSSAOptimization() {
+ // Pre-ra tail duplication.
+ addPass(&EarlyTailDuplicateID);
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(&OptimizePHIsID, false);
+
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringID, false);
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID, false);
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(&DeadMachineInstructionElimID);
+
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ addILPOpts();
+
+ addPass(&MachineLICMID, false);
+ addPass(&MachineCSEID, false);
+ addPass(&MachineSinkingID);
+
+ addPass(&PeepholeOptimizerID);
+ // Clean-up the dead code that may have been generated by peephole
+ // rewriting.
+ addPass(&DeadMachineInstructionElimID);
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
+//===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::getOptimizeRegAlloc() const {
+ switch (OptimizeRegAlloc) {
+ case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None;
+ case cl::BOU_TRUE: return true;
+ case cl::BOU_FALSE: return false;
+ }
+ llvm_unreachable("Invalid optimize-regalloc state");
+}
+
+/// RegisterRegAlloc's global Registry tracks allocator registration.
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+/// A dummy default pass factory indicates whether the register allocator is
+/// overridden on the command line.
+LLVM_DEFINE_ONCE_FLAG(InitializeDefaultRegisterAllocatorFlag);
+static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
+static RegisterRegAlloc
+defaultRegAlloc("default",
+ "pick register allocator based on -O option",
+ useDefaultRegisterAllocator);
+
+/// -regalloc=... command line option.
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
+
+static void initializeDefaultRegisterAllocatorOnce() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = RegAlloc;
+ RegisterRegAlloc::setDefault(RegAlloc);
+ }
+}
+
+
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
+///
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) {
+ if (Optimized)
+ return createGreedyRegisterAllocator();
+ else
+ return createFastRegisterAllocator();
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level. Different register allocators are
+/// defined as separate passes because they may require different analysis.
+///
+/// This helper ensures that the regalloc= option is always available,
+/// even for targets that override the default allocator.
+///
+/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs,
+/// this can be folded into addPass.
+FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
+ // Initialize the global default.
+ llvm::call_once(InitializeDefaultRegisterAllocatorFlag,
+ initializeDefaultRegisterAllocatorOnce);
+
+ RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+ if (Ctor != useDefaultRegisterAllocator)
+ return Ctor();
+
+ // With no -regalloc= override, ask the target for a regalloc pass.
+ return createTargetRegisterAllocator(Optimized);
+}
+
+/// Return true if the default global register allocator is in use and
+/// has not be overriden on the command line with '-regalloc=...'
+bool TargetPassConfig::usingDefaultRegAlloc() const {
+ return RegAlloc.getNumOccurrences() == 0;
+}
+
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&PHIEliminationID, false);
+ addPass(&TwoAddressInstructionPassID, false);
+
+ if (RegAllocPass)
+ addPass(RegAllocPass);
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&DetectDeadLanesID, false);
+
+ addPass(&ProcessImplicitDefsID, false);
+
+ // LiveVariables currently requires pure SSA form.
+ //
+ // FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+ // LiveVariables can be removed completely, and LiveIntervals can be directly
+ // computed. (We still either need to regenerate kill flags after regalloc, or
+ // preferably fix the scavenger to not depend on them).
+ addPass(&LiveVariablesID, false);
+
+ // Edge splitting is smarter with machine loop info.
+ addPass(&MachineLoopInfoID, false);
+ addPass(&PHIEliminationID, false);
+
+ // Eventually, we want to run LiveIntervals before PHI elimination.
+ if (EarlyLiveIntervals)
+ addPass(&LiveIntervalsID, false);
+
+ addPass(&TwoAddressInstructionPassID, false);
+ addPass(&RegisterCoalescerID);
+
+ // The machine scheduler may accidentally create disconnected components
+ // when moving subregister definitions around, avoid this by splitting them to
+ // separate vregs before. Splitting can also improve reg. allocation quality.
+ addPass(&RenameIndependentSubregsID);
+
+ // PreRA instruction scheduling.
+ addPass(&MachineSchedulerID);
+
+ if (RegAllocPass) {
+ // Add the selected register allocation pass.
+ addPass(RegAllocPass);
+
+ // Allow targets to change the register assignments before rewriting.
+ addPreRewrite();
+
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(&StackSlotColoringID);
+
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(&PostRAMachineLICMID);
+ }
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+void TargetPassConfig::addMachineLateOptimization() {
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ addPass(&BranchFolderPassID);
+
+ // Tail duplication.
+ // Note that duplicating tail just increases code size and degrades
+ // performance for targets that require Structured Control Flow.
+ // In addition it can also make CFG irreducible. Thus we disable it.
+ if (!TM->requiresStructuredCFG())
+ addPass(&TailDuplicateID);
+
+ // Copy propagation.
+ addPass(&MachineCopyPropagationID);
+}
+
+/// Add standard GC passes.
+bool TargetPassConfig::addGCPasses() {
+ addPass(&GCMachineCodeAnalysisID, false);
+ return true;
+}
+
+/// Add standard basic block placement passes.
+void TargetPassConfig::addBlockPlacement() {
+ if (addPass(&MachineBlockPlacementID)) {
+ // Run a separate pass to collect block placement statistics.
+ if (EnableBlockPlacementStats)
+ addPass(&MachineBlockPlacementStatsID);
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
new file mode 100644
index 000000000000..e1d90cb913e5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -0,0 +1,399 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "target-reg-info"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
+ regclass_iterator RCB, regclass_iterator RCE,
+ const char *const *SRINames,
+ const unsigned *SRILaneMasks,
+ unsigned SRICoveringLanes)
+ : InfoDesc(ID), SubRegIndexNames(SRINames),
+ SubRegIndexLaneMasks(SRILaneMasks),
+ RegClassBegin(RCB), RegClassEnd(RCE),
+ CoveringLanes(SRICoveringLanes) {
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+namespace llvm {
+
+Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
+ unsigned SubIdx) {
+ return Printable([Reg, TRI, SubIdx](raw_ostream &OS) {
+ if (!Reg)
+ OS << "%noreg";
+ else if (TargetRegisterInfo::isStackSlot(Reg))
+ OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+ else if (TargetRegisterInfo::isVirtualRegister(Reg))
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+ else if (TRI && Reg < TRI->getNumRegs())
+ OS << '%' << TRI->getName(Reg);
+ else
+ OS << "%physreg" << Reg;
+ if (SubIdx) {
+ if (TRI)
+ OS << ':' << TRI->getSubRegIndexName(SubIdx);
+ else
+ OS << ":sub(" << SubIdx << ')';
+ }
+ });
+}
+
+Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+ return Printable([Unit, TRI](raw_ostream &OS) {
+ // Generic printout when TRI is missing.
+ if (!TRI) {
+ OS << "Unit~" << Unit;
+ return;
+ }
+
+ // Check for invalid register units.
+ if (Unit >= TRI->getNumRegUnits()) {
+ OS << "BadUnit~" << Unit;
+ return;
+ }
+
+ // Normal units have at least one root.
+ MCRegUnitRootIterator Roots(Unit, TRI);
+ assert(Roots.isValid() && "Unit has no roots.");
+ OS << TRI->getName(*Roots);
+ for (++Roots; Roots.isValid(); ++Roots)
+ OS << '~' << TRI->getName(*Roots);
+ });
+}
+
+Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+ return Printable([Unit, TRI](raw_ostream &OS) {
+ if (TRI && TRI->isVirtualRegister(Unit)) {
+ OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
+ } else {
+ OS << PrintRegUnit(Unit, TRI);
+ }
+ });
+}
+
+Printable PrintLaneMask(LaneBitmask LaneMask) {
+ return Printable([LaneMask](raw_ostream &OS) {
+ OS << format("%08X", LaneMask);
+ });
+}
+
+} // End of llvm namespace
+
+/// getAllocatableClass - Return the maximal subclass of the given register
+/// class that is alloctable, or NULL.
+const TargetRegisterClass *
+TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
+ if (!RC || RC->isAllocatable())
+ return RC;
+
+ for (BitMaskClassIterator It(RC->getSubClassMask(), *this); It.isValid();
+ ++It) {
+ const TargetRegisterClass *SubRC = getRegClass(It.getID());
+ if (SubRC->isAllocatable())
+ return SubRC;
+ }
+ return nullptr;
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
+ assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+ // Pick the most sub register class of the right type that contains
+ // this physreg.
+ const TargetRegisterClass* BestRC = nullptr;
+ for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+ const TargetRegisterClass* RC = *I;
+ if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+ (!BestRC || BestRC->hasSubClass(RC)))
+ BestRC = RC;
+ }
+
+ assert(BestRC && "Couldn't find the register class");
+ return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+ const TargetRegisterClass *RC, BitVector &R){
+ assert(RC->isAllocatable() && "invalid for nonallocatable sets");
+ ArrayRef<MCPhysReg> Order = RC->getRawAllocationOrder(MF);
+ for (unsigned i = 0; i != Order.size(); ++i)
+ R.set(Order[i]);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+ const TargetRegisterClass *RC) const {
+ BitVector Allocatable(getNumRegs());
+ if (RC) {
+ // A register class with no allocatable subclass returns an empty set.
+ const TargetRegisterClass *SubClass = getAllocatableClass(RC);
+ if (SubClass)
+ getAllocatableSetForRC(MF, SubClass, Allocatable);
+ } else {
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ if ((*I)->isAllocatable())
+ getAllocatableSetForRC(MF, *I, Allocatable);
+ }
+
+ // Mask out the reserved registers
+ BitVector Reserved = getReservedRegs(MF);
+ Allocatable &= Reserved.flip();
+
+ return Allocatable;
+}
+
+static inline
+const TargetRegisterClass *firstCommonClass(const uint32_t *A,
+ const uint32_t *B,
+ const TargetRegisterInfo *TRI,
+ const MVT::SimpleValueType SVT =
+ MVT::SimpleValueType::Any) {
+ const MVT VT(SVT);
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
+ if (unsigned Common = *A++ & *B++) {
+ const TargetRegisterClass *RC =
+ TRI->getRegClass(I + countTrailingZeros(Common));
+ if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT))
+ return RC;
+ }
+ return nullptr;
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ const MVT::SimpleValueType SVT) const {
+ // First take care of the trivial cases.
+ if (A == B)
+ return A;
+ if (!A || !B)
+ return nullptr;
+
+ // Register classes are ordered topologically, so the largest common
+ // sub-class it the common sub-class with the smallest ID.
+ return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT);
+}
+
+const TargetRegisterClass *
+TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+ const TargetRegisterClass *B,
+ unsigned Idx) const {
+ assert(A && B && "Missing register class");
+ assert(Idx && "Bad sub-register index");
+
+ // Find Idx in the list of super-register indices.
+ for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI)
+ if (RCI.getSubReg() == Idx)
+ // The bit mask contains all register classes that are projected into B
+ // by Idx. Find a class that is also a sub-class of A.
+ return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
+ return nullptr;
+}
+
+const TargetRegisterClass *TargetRegisterInfo::
+getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
+ const TargetRegisterClass *RCB, unsigned SubB,
+ unsigned &PreA, unsigned &PreB) const {
+ assert(RCA && SubA && RCB && SubB && "Invalid arguments");
+
+ // Search all pairs of sub-register indices that project into RCA and RCB
+ // respectively. This is quadratic, but usually the sets are very small. On
+ // most targets like X86, there will only be a single sub-register index
+ // (e.g., sub_16bit projecting into GR16).
+ //
+ // The worst case is a register class like DPR on ARM.
+ // We have indices dsub_0..dsub_7 projecting into that class.
+ //
+ // It is very common that one register class is a sub-register of the other.
+ // Arrange for RCA to be the larger register so the answer will be found in
+ // the first iteration. This makes the search linear for the most common
+ // case.
+ const TargetRegisterClass *BestRC = nullptr;
+ unsigned *BestPreA = &PreA;
+ unsigned *BestPreB = &PreB;
+ if (RCA->getSize() < RCB->getSize()) {
+ std::swap(RCA, RCB);
+ std::swap(SubA, SubB);
+ std::swap(BestPreA, BestPreB);
+ }
+
+ // Also terminate the search one we have found a register class as small as
+ // RCA.
+ unsigned MinSize = RCA->getSize();
+
+ for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
+ unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
+ for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) {
+ // Check if a common super-register class exists for this index pair.
+ const TargetRegisterClass *RC =
+ firstCommonClass(IA.getMask(), IB.getMask(), this);
+ if (!RC || RC->getSize() < MinSize)
+ continue;
+
+ // The indexes must compose identically: PreA+SubA == PreB+SubB.
+ unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB);
+ if (FinalA != FinalB)
+ continue;
+
+ // Is RC a better candidate than BestRC?
+ if (BestRC && RC->getSize() >= BestRC->getSize())
+ continue;
+
+ // Yes, RC is the smallest super-register seen so far.
+ BestRC = RC;
+ *BestPreA = IA.getSubReg();
+ *BestPreB = IB.getSubReg();
+
+ // Bail early if we reached MinSize. We won't find a better candidate.
+ if (BestRC->getSize() == MinSize)
+ return BestRC;
+ }
+ }
+ return BestRC;
+}
+
+/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
+/// share the same register file.
+static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) {
+ // Same register class.
+ if (DefRC == SrcRC)
+ return true;
+
+ // Both operands are sub registers. Check if they share a register class.
+ unsigned SrcIdx, DefIdx;
+ if (SrcSubReg && DefSubReg) {
+ return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
+ SrcIdx, DefIdx) != nullptr;
+ }
+
+ // At most one of the register is a sub register, make it Src to avoid
+ // duplicating the test.
+ if (!SrcSubReg) {
+ std::swap(DefSubReg, SrcSubReg);
+ std::swap(DefRC, SrcRC);
+ }
+
+ // One of the register is a sub register, check if we can get a superclass.
+ if (SrcSubReg)
+ return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
+
+ // Plain copy.
+ return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
+}
+
+bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
+ // If this source does not incur a cross register bank copy, use it.
+ return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg);
+}
+
+// Compute target-independent register allocator hints to help eliminate copies.
+void
+TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+ ArrayRef<MCPhysReg> Order,
+ SmallVectorImpl<MCPhysReg> &Hints,
+ const MachineFunction &MF,
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+ // Hints with HintType != 0 were set by target-dependent code.
+ // Such targets must provide their own implementation of
+ // TRI::getRegAllocationHints to interpret those hint types.
+ assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints");
+
+ // Target-independent hints are either a physical or a virtual register.
+ unsigned Phys = Hint.second;
+ if (VRM && isVirtualRegister(Phys))
+ Phys = VRM->getPhys(Phys);
+
+ // Check that Phys is a valid hint in VirtReg's register class.
+ if (!isPhysicalRegister(Phys))
+ return;
+ if (MRI.isReserved(Phys))
+ return;
+ // Check that Phys is in the allocation order. We shouldn't heed hints
+ // from VirtReg's register class if they aren't in the allocation order. The
+ // target probably has a reason for removing the register.
+ if (std::find(Order.begin(), Order.end(), Phys) == Order.end())
+ return;
+
+ // All clear, tell the register allocator to prefer this register.
+ Hints.push_back(Phys);
+}
+
+bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ return !MF.getFunction()->hasFnAttribute("no-realign-stack");
+}
+
+bool TargetRegisterInfo::needsStackRealignment(
+ const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = TFI->getStackAlignment();
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttribute(Attribute::StackAlignment));
+ if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) {
+ if (canRealignStack(MF))
+ return true;
+ DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n");
+ }
+ return false;
+}
+
+bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
+ const uint32_t *mask1) const {
+ unsigned N = (getNumRegs()+31) / 32;
+ for (unsigned I = 0; I < N; ++I)
+ if ((mask0[I] & mask1[I]) != mask0[I])
+ return false;
+ return true;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void
+TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
+ const TargetRegisterInfo *TRI) {
+ dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n";
+}
+#endif
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
new file mode 100644
index 000000000000..022e912aa84f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -0,0 +1,300 @@
+//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a wrapper around MCSchedModel that allows the interface
+// to benefit from information currently only available in TargetInstrInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
+ cl::desc("Use TargetSchedModel for latency lookup"));
+
+static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
+ cl::desc("Use InstrItineraryData for latency lookup"));
+
+bool TargetSchedModel::hasInstrSchedModel() const {
+ return EnableSchedModel && SchedModel.hasInstrSchedModel();
+}
+
+bool TargetSchedModel::hasInstrItineraries() const {
+ return EnableSchedItins && !InstrItins.isEmpty();
+}
+
+static unsigned gcd(unsigned Dividend, unsigned Divisor) {
+ // Dividend and Divisor will be naturally swapped as needed.
+ while(Divisor) {
+ unsigned Rem = Dividend % Divisor;
+ Dividend = Divisor;
+ Divisor = Rem;
+ };
+ return Dividend;
+}
+static unsigned lcm(unsigned A, unsigned B) {
+ unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
+ assert((LCM >= A && LCM >= B) && "LCM overflow");
+ return LCM;
+}
+
+void TargetSchedModel::init(const MCSchedModel &sm,
+ const TargetSubtargetInfo *sti,
+ const TargetInstrInfo *tii) {
+ SchedModel = sm;
+ STI = sti;
+ TII = tii;
+ STI->initInstrItins(InstrItins);
+
+ unsigned NumRes = SchedModel.getNumProcResourceKinds();
+ ResourceFactors.resize(NumRes);
+ ResourceLCM = SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ if (NumUnits > 0)
+ ResourceLCM = lcm(ResourceLCM, NumUnits);
+ }
+ MicroOpFactor = ResourceLCM / SchedModel.IssueWidth;
+ for (unsigned Idx = 0; Idx < NumRes; ++Idx) {
+ unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits;
+ ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0;
+ }
+}
+
+unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrItineraries()) {
+ int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass());
+ return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI);
+ }
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->NumMicroOps;
+ }
+ return MI->isTransient() ? 0 : 1;
+}
+
+// The machine model may explicitly specify an invalid latency, which
+// effectively means infinite latency. Since users of the TargetSchedule API
+// don't know how to handle this, we convert it to a very large latency that is
+// easy to distinguish when debugging the DAG but won't induce overflow.
+static unsigned capLatency(int Cycles) {
+ return Cycles >= 0 ? Cycles : 1000;
+}
+
+/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require
+/// evaluation of predicates that depend on instruction operands or flags.
+const MCSchedClassDesc *TargetSchedModel::
+resolveSchedClass(const MachineInstr *MI) const {
+
+ // Get the definition's scheduling class descriptor from this machine model.
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ return SCDesc;
+
+#ifndef NDEBUG
+ unsigned NIter = 0;
+#endif
+ while (SCDesc->isVariant()) {
+ assert(++NIter < 6 && "Variants are nested deeper than the magic number");
+
+ SchedClass = STI->resolveSchedClass(SchedClass, MI, this);
+ SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ }
+ return SCDesc;
+}
+
+/// Find the def index of this operand. This index maps to the machine model and
+/// is independent of use operands. Def operands may be reordered with uses or
+/// merged with uses without affecting the def index (e.g. before/after
+/// regalloc). However, an instruction's def operands must never be reordered
+/// with respect to each other.
+static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
+ unsigned DefIdx = 0;
+ for (unsigned i = 0; i != DefOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ ++DefIdx;
+ }
+ return DefIdx;
+}
+
+/// Find the use index of this operand. This is independent of the instruction's
+/// def operands.
+///
+/// Note that uses are not determined by the operand's isUse property, which
+/// is simply the inverse of isDef. Here we consider any readsReg operand to be
+/// a "use". The machine model allows an operand to be both a Def and Use.
+static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
+ unsigned UseIdx = 0;
+ for (unsigned i = 0; i != UseOperIdx; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.readsReg())
+ ++UseIdx;
+ }
+ return UseIdx;
+}
+
+// Top-level API for clients that know the operand indices.
+unsigned TargetSchedModel::computeOperandLatency(
+ const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *UseMI, unsigned UseOperIdx) const {
+
+ if (!hasInstrSchedModel() && !hasInstrItineraries())
+ return TII->defaultDefLatency(SchedModel, *DefMI);
+
+ if (hasInstrItineraries()) {
+ int OperLatency = 0;
+ if (UseMI) {
+ OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
+ *UseMI, UseOperIdx);
+ }
+ else {
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx);
+ }
+ if (OperLatency >= 0)
+ return OperLatency;
+
+ // No operand latency was found.
+ unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI);
+
+ // Expected latency is the max of the stage latency and itinerary props.
+ // Rather than directly querying InstrItins stage latency, we call a TII
+ // hook to allow subtargets to specialize latency. This hook is only
+ // applicable to the InstrItins model. InstrSchedModel should model all
+ // special cases without TII hooks.
+ InstrLatency =
+ std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI));
+ return InstrLatency;
+ }
+ // hasInstrSchedModel()
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
+ if (DefIdx < SCDesc->NumWriteLatencyEntries) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(SCDesc, DefIdx);
+ unsigned WriteID = WLEntry->WriteResourceID;
+ unsigned Latency = capLatency(WLEntry->Cycles);
+ if (!UseMI)
+ return Latency;
+
+ // Lookup the use's latency adjustment in SubtargetInfo.
+ const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
+ if (UseDesc->NumReadAdvanceEntries == 0)
+ return Latency;
+ unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
+ int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+ if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
+ return 0;
+ return Latency - Advance;
+ }
+ // If DefIdx does not exist in the model (e.g. implicit defs), then return
+ // unit latency (defaultDefLatency may be too conservative).
+#ifndef NDEBUG
+ if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
+ && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
+ && SchedModel.isComplete()) {
+ errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
+ << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
+ llvm_unreachable("incomplete machine model");
+ }
+#endif
+ // FIXME: Automatically giving all implicit defs defaultDefLatency is
+ // undesirable. We should only do it for defs that are known to the MC
+ // desc like flags. Truly implicit defs should get 1 cycle latency.
+ return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI);
+}
+
+unsigned
+TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const {
+ unsigned Latency = 0;
+ for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries;
+ DefIdx != DefEnd; ++DefIdx) {
+ // Lookup the definition's write latency in SubtargetInfo.
+ const MCWriteLatencyEntry *WLEntry =
+ STI->getWriteLatencyEntry(&SCDesc, DefIdx);
+ Latency = std::max(Latency, capLatency(WLEntry->Cycles));
+ }
+ return Latency;
+}
+
+unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
+ assert(hasInstrSchedModel() && "Only call this function with a SchedModel");
+
+ unsigned SCIdx = TII->get(Opcode).getSchedClass();
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx);
+
+ if (SCDesc->isValid() && !SCDesc->isVariant())
+ return computeInstrLatency(*SCDesc);
+
+ llvm_unreachable("No MI sched latency");
+}
+
+unsigned
+TargetSchedModel::computeInstrLatency(const MachineInstr *MI,
+ bool UseDefaultDefLatency) const {
+ // For the itinerary model, fall back to the old subtarget hook.
+ // Allow subtargets to compute Bundle latencies outside the machine model.
+ if (hasInstrItineraries() || MI->isBundle() ||
+ (!hasInstrSchedModel() && !UseDefaultDefLatency))
+ return TII->getInstrLatency(&InstrItins, *MI);
+
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
+ if (SCDesc->isValid())
+ return computeInstrLatency(*SCDesc);
+ }
+ return TII->defaultDefLatency(SchedModel, *MI);
+}
+
+unsigned TargetSchedModel::
+computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
+ const MachineInstr *DepMI) const {
+ if (!SchedModel.isOutOfOrder())
+ return 1;
+
+ // Out-of-order processor can dispatch WAW dependencies in the same cycle.
+
+ // Treat predication as a data dependency for out-of-order cpus. In-order
+ // cpus do not need to treat predicated writes specially.
+ //
+ // TODO: The following hack exists because predication passes do not
+ // correctly append imp-use operands, and readsReg() strangely returns false
+ // for predicated defs.
+ unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
+ const MachineFunction &MF = *DefMI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
+ return computeInstrLatency(DefMI);
+
+ // If we have a per operand scheduling model, check if this def is writing
+ // an unbuffered resource. If so, it treated like an in-order cpu.
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
+ if (SCDesc->isValid()) {
+ for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc),
+ *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) {
+ if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize)
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 000000000000..3d9a51864b6c
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,1806 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+// A = B op C
+//
+// to:
+//
+// A = B
+// A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "twoaddrinstr"
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
+STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
+STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
+
+// Temporary flag to disable rescheduling.
+static cl::opt<bool>
+EnableRescheduling("twoaddr-reschedule",
+ cl::desc("Coalesce copies by rescheduling (default=true)"),
+ cl::init(true), cl::Hidden);
+
+namespace {
+class TwoAddressInstructionPass : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ MachineRegisterInfo *MRI;
+ LiveVariables *LV;
+ LiveIntervals *LIS;
+ AliasAnalysis *AA;
+ CodeGenOpt::Level OptLevel;
+
+ // The current basic block being processed.
+ MachineBasicBlock *MBB;
+
+ // Keep track the distance of a MI from the start of the current basic block.
+ DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+ // Set of already processed instructions in the current block.
+ SmallPtrSet<MachineInstr*, 8> Processed;
+
+ // A map from virtual registers to physical registers which are likely targets
+ // to be coalesced to due to copies from physical registers to virtual
+ // registers. e.g. v1024 = move r0.
+ DenseMap<unsigned, unsigned> SrcRegMap;
+
+ // A map from virtual registers to physical registers which are likely targets
+ // to be coalesced to due to copies to physical registers from virtual
+ // registers. e.g. r1 = move v1024.
+ DenseMap<unsigned, unsigned> DstRegMap;
+
+ bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
+ MachineBasicBlock::iterator OldPos);
+
+ bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
+
+ bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
+
+ bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ MachineInstr *MI, unsigned Dist);
+
+ bool commuteInstruction(MachineInstr *MI,
+ unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
+
+ bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+
+ bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned RegA, unsigned RegB, unsigned Dist);
+
+ bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI);
+
+ bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+ bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg);
+
+ bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist, bool shouldOnlyCommute);
+
+ bool tryInstructionCommute(MachineInstr *MI,
+ unsigned DstOpIdx,
+ unsigned BaseOpIdx,
+ bool BaseOpKilled,
+ unsigned Dist);
+ void scanUses(unsigned DstReg);
+
+ void processCopy(MachineInstr *MI);
+
+ typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+ typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+ bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+ void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+ void eliminateRegSequence(MachineBasicBlock::iterator&);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addUsedIfAvailable<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ /// Pass entry point.
+ bool runOnMachineFunction(MachineFunction&) override;
+};
+} // end anonymous namespace
+
+char TwoAddressInstructionPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
+
+/// A two-address instruction has been converted to a three-address instruction
+/// to avoid clobbering a register. Try to sink it past the instruction that
+/// would kill the above mentioned register to reduce register pressure.
+bool TwoAddressInstructionPass::
+sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
+ MachineBasicBlock::iterator OldPos) {
+ // FIXME: Shouldn't we be trying to do this before we three-addressify the
+ // instruction? After this transformation is done, we no longer need
+ // the instruction to be in three-address form.
+
+ // Check if it's safe to move this instruction.
+ bool SeenStore = true; // Be conservative.
+ if (!MI->isSafeToMove(AA, SeenStore))
+ return false;
+
+ unsigned DefReg = 0;
+ SmallSet<unsigned, 4> UseRegs;
+
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse() && MOReg != SavedReg)
+ UseRegs.insert(MO.getReg());
+ if (!MO.isDef())
+ continue;
+ if (MO.isImplicit())
+ // Don't try to move it if it implicitly defines a register.
+ return false;
+ if (DefReg)
+ // For now, don't move any instructions that define multiple registers.
+ return false;
+ DefReg = MO.getReg();
+ }
+
+ // Find the instruction that kills SavedReg.
+ MachineInstr *KillMI = nullptr;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(SavedReg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ }
+ if (!KillMI) {
+ for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) {
+ if (!UseMO.isKill())
+ continue;
+ KillMI = UseMO.getParent();
+ break;
+ }
+ }
+
+ // If we find the instruction that kills SavedReg, and it is in an
+ // appropriate location, we can try to sink the current instruction
+ // past it.
+ if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
+ MachineBasicBlock::iterator(KillMI) == OldPos || KillMI->isTerminator())
+ return false;
+
+ // If any of the definitions are used by another instruction between the
+ // position and the kill use, then it's not safe to sink it.
+ //
+ // FIXME: This can be sped up if there is an easy way to query whether an
+ // instruction is before or after another instruction. Then we can use
+ // MachineRegisterInfo def / use instead.
+ MachineOperand *KillMO = nullptr;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+
+ unsigned NumVisited = 0;
+ for (MachineInstr &OtherMI : llvm::make_range(std::next(OldPos), KillPos)) {
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI.isDebugValue())
+ continue;
+ if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ for (unsigned i = 0, e = OtherMI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = OtherMI.getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (DefReg == MOReg)
+ return false;
+
+ if (MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))) {
+ if (&OtherMI == KillMI && MOReg == SavedReg)
+ // Save the operand that kills the register. We want to unset the kill
+ // marker if we can sink MI past it.
+ KillMO = &MO;
+ else if (UseRegs.count(MOReg))
+ // One of the uses is killed before the destination.
+ return false;
+ }
+ }
+ }
+ assert(KillMO && "Didn't find kill");
+
+ if (!LIS) {
+ // Update kill and LV information.
+ KillMO->setIsKill(false);
+ KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+ KillMO->setIsKill(true);
+
+ if (LV)
+ LV->replaceKillInstruction(SavedReg, *KillMI, *MI);
+ }
+
+ // Move instruction to its destination.
+ MBB->remove(MI);
+ MBB->insert(KillPos, MI);
+
+ if (LIS)
+ LIS->handleMove(*MI);
+
+ ++Num3AddrSunk;
+ return true;
+}
+
+/// Return the MachineInstr* if it is the single def of the Reg in current BB.
+static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ MachineInstr *Ret = nullptr;
+ for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getParent() != BB || DefMI.isDebugValue())
+ continue;
+ if (!Ret)
+ Ret = &DefMI;
+ else if (Ret != &DefMI)
+ return nullptr;
+ }
+ return Ret;
+}
+
+/// Check if there is a reversed copy chain from FromReg to ToReg:
+/// %Tmp1 = copy %Tmp2;
+/// %FromReg = copy %Tmp1;
+/// %ToReg = add %FromReg ...
+/// %Tmp2 = copy %ToReg;
+/// MaxLen specifies the maximum length of the copy chain the func
+/// can walk through.
+bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
+ int Maxlen) {
+ unsigned TmpReg = FromReg;
+ for (int i = 0; i < Maxlen; i++) {
+ MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
+ if (!Def || !Def->isCopy())
+ return false;
+
+ TmpReg = Def->getOperand(1).getReg();
+
+ if (TmpReg == ToReg)
+ return true;
+ }
+ return false;
+}
+
+/// Return true if there are no intervening uses between the last instruction
+/// in the MBB that defines the specified register and the two-address
+/// instruction which is being processed. It also returns the last def location
+/// by reference.
+bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
+ unsigned &LastDef) {
+ LastDef = 0;
+ unsigned LastUse = Dist;
+ for (MachineOperand &MO : MRI->reg_operands(Reg)) {
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB || MI->isDebugValue())
+ continue;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ continue;
+ if (MO.isUse() && DI->second < LastUse)
+ LastUse = DI->second;
+ if (MO.isDef() && DI->second > LastDef)
+ LastDef = DI->second;
+ }
+
+ return !(LastUse > LastDef && LastUse < Dist);
+}
+
+/// Return true if the specified MI is a copy instruction or an extract_subreg
+/// instruction. It also returns the source and destination registers and
+/// whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+ unsigned &SrcReg, unsigned &DstReg,
+ bool &IsSrcPhys, bool &IsDstPhys) {
+ SrcReg = 0;
+ DstReg = 0;
+ if (MI.isCopy()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else
+ return false;
+
+ IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return true;
+}
+
+/// Test if the given register value, which is used by the
+/// given instruction, is killed by the given instruction.
+static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
+ LiveIntervals *LIS) {
+ if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LIS->isNotInMIMap(*MI)) {
+ // FIXME: Sometimes tryInstructionTransform() will add instructions and
+ // test whether they can be folded before keeping them. In this case it
+ // sets a kill before recursively calling tryInstructionTransform() again.
+ // If there is no interval available, we assume that this instruction is
+ // one of those. A kill flag is manually inserted on the operand so the
+ // check below will handle it.
+ LiveInterval &LI = LIS->getInterval(Reg);
+ // This is to match the kill flag version where undefs don't have kill
+ // flags.
+ if (!LI.hasAtLeastOneValue())
+ return false;
+
+ SlotIndex useIdx = LIS->getInstructionIndex(*MI);
+ LiveInterval::const_iterator I = LI.find(useIdx);
+ assert(I != LI.end() && "Reg must be live-in to use.");
+ return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+ }
+
+ return MI->killsRegister(Reg);
+}
+
+/// Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+/// %reg1034 = copy %reg1024
+/// %reg1035 = copy %reg1025<kill>
+/// %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+/// If allowFalsePositives is true then likely kills are treated as kills even
+/// if it can't be proven that they are kills.
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+ const MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ LiveIntervals *LIS,
+ bool allowFalsePositives) {
+ MachineInstr *DefMI = &MI;
+ for (;;) {
+ // All uses of physical registers are likely to be kills.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ (allowFalsePositives || MRI->hasOneUse(Reg)))
+ return true;
+ if (!isPlainlyKilled(DefMI, Reg, LIS))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return true;
+ MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+ // If there are multiple defs, we can't do a simple analysis, so just
+ // go with what the kill flag says.
+ if (std::next(Begin) != MRI->def_end())
+ return true;
+ DefMI = Begin->getParent();
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ // If the def is something other than a copy, then it isn't going to
+ // be coalesced, so follow the kill flag.
+ if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return true;
+ Reg = SrcReg;
+ }
+}
+
+/// Return true if the specified MI uses the specified register as a two-address
+/// use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+ for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+ continue;
+ unsigned ti;
+ if (MI.isRegTiedToDefOperand(i, &ti)) {
+ DstReg = MI.getOperand(ti).getReg();
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Given a register, if has a single in-basic block use, return the use
+/// instruction if it's a copy or a two-address use.
+static
+MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII,
+ bool &IsCopy,
+ unsigned &DstReg, bool &IsDstPhys) {
+ if (!MRI->hasOneNonDBGUse(Reg))
+ // None or more than one use.
+ return nullptr;
+ MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
+ if (UseMI.getParent() != MBB)
+ return nullptr;
+ unsigned SrcReg;
+ bool IsSrcPhys;
+ if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+ IsCopy = true;
+ return &UseMI;
+ }
+ IsDstPhys = false;
+ if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return &UseMI;
+ }
+ return nullptr;
+}
+
+/// Return the physical register the specified virtual register might be mapped
+/// to.
+static unsigned
+getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
+ while (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+ if (SI == RegMap.end())
+ return 0;
+ Reg = SI->second;
+ }
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg;
+ return 0;
+}
+
+/// Return true if the two registers are equal or aliased.
+static bool
+regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+ if (RegA == RegB)
+ return true;
+ if (!RegA || !RegB)
+ return false;
+ return TRI->regsOverlap(RegA, RegB);
+}
+
+/// Return true if it's potentially profitable to commute the two-address
+/// instruction that's being processed.
+bool
+TwoAddressInstructionPass::
+isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ MachineInstr *MI, unsigned Dist) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ // Determine if it's profitable to commute this two address instruction. In
+ // general, we want no uses between this instruction and the definition of
+ // the two-address register.
+ // e.g.
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1028
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // In this case, it might not be possible to coalesce the second MOV8rr
+ // instruction if the first one is coalesced. So it would be profitable to
+ // commute it:
+ // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+ // %reg1029<def> = MOV8rr %reg1028
+ // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+ // insert => %reg1030<def> = MOV8rr %reg1029
+ // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
+
+ if (!isPlainlyKilled(MI, regC, LIS))
+ return false;
+
+ // Ok, we have something like:
+ // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+ // let's see if it's worth commuting it.
+
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r0 = MOV %reg1026
+ // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+ unsigned ToRegA = getMappedReg(regA, DstRegMap);
+ if (ToRegA) {
+ unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+ unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI);
+ bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI);
+
+ // Compute if any of the following are true:
+ // -RegB is not tied to a register and RegC is compatible with RegA.
+ // -RegB is tied to the wrong physical register, but RegC is.
+ // -RegB is tied to the wrong physical register, and RegC isn't tied.
+ if ((!FromRegB && CompC) || (FromRegB && !CompB && (!FromRegC || CompC)))
+ return true;
+ // Don't compute if any of the following are true:
+ // -RegC is not tied to a register and RegB is compatible with RegA.
+ // -RegC is tied to the wrong physical register, but RegB is.
+ // -RegC is tied to the wrong physical register, and RegB isn't tied.
+ if ((!FromRegC && CompB) || (FromRegC && !CompC && (!FromRegB || CompB)))
+ return false;
+ }
+
+ // If there is a use of regC between its last def (could be livein) and this
+ // instruction, then bail.
+ unsigned LastDefC = 0;
+ if (!noUseAfterLastDef(regC, Dist, LastDefC))
+ return false;
+
+ // If there is a use of regB between its last def (could be livein) and this
+ // instruction, then go ahead and make this transformation.
+ unsigned LastDefB = 0;
+ if (!noUseAfterLastDef(regB, Dist, LastDefB))
+ return true;
+
+ // Look for situation like this:
+ // %reg101 = MOV %reg100
+ // %reg102 = ...
+ // %reg103 = ADD %reg102, %reg101
+ // ... = %reg103 ...
+ // %reg100 = MOV %reg103
+ // If there is a reversed copy chain from reg101 to reg103, commute the ADD
+ // to eliminate an otherwise unavoidable copy.
+ // FIXME:
+ // We can extend the logic further: If an pair of operands in an insn has
+ // been merged, the insn could be regarded as a virtual copy, and the virtual
+ // copy could also be used to construct a copy chain.
+ // To more generally minimize register copies, ideally the logic of two addr
+ // instruction pass should be integrated with register allocation pass where
+ // interference graph is available.
+ if (isRevCopyChain(regC, regA, 3))
+ return true;
+
+ if (isRevCopyChain(regB, regA, 3))
+ return false;
+
+ // Since there are no intervening uses for both registers, then commute
+ // if the def of regC is closer. Its live interval is shorter.
+ return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// Commute a two-address instruction and update the basic block, distance map,
+/// and live variables if needed. Return true if it is successful.
+bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
+ unsigned RegBIdx,
+ unsigned RegCIdx,
+ unsigned Dist) {
+ unsigned RegC = MI->getOperand(RegCIdx).getReg();
+ DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
+ MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx);
+
+ if (NewMI == nullptr) {
+ DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
+ assert(NewMI == MI &&
+ "TargetInstrInfo::commuteInstruction() should not return a new "
+ "instruction unless it was requested.");
+
+ // Update source register map.
+ unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+ if (FromRegC) {
+ unsigned RegA = MI->getOperand(0).getReg();
+ SrcRegMap[RegA] = FromRegC;
+ }
+
+ return true;
+}
+
+/// Return true if it is profitable to convert the given 2-address instruction
+/// to a 3-address one.
+bool
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
+ // Look for situations like this:
+ // %reg1024<def> = MOV r1
+ // %reg1025<def> = MOV r0
+ // %reg1026<def> = ADD %reg1024, %reg1025
+ // r2 = MOV %reg1026
+ // Turn ADD into a 3-address instruction to avoid a copy.
+ unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+ if (!FromRegB)
+ return false;
+ unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+ return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
+}
+
+/// Convert the specified two-address instruction into a three address one.
+/// Return true if this transformation was successful.
+bool
+TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned RegA, unsigned RegB,
+ unsigned Dist) {
+ // FIXME: Why does convertToThreeAddress() need an iterator reference?
+ MachineFunction::iterator MFI = MBB->getIterator();
+ MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV);
+ assert(MBB->getIterator() == MFI &&
+ "convertToThreeAddress changed iterator reference");
+ if (!NewMI)
+ return false;
+
+ DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+ DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
+ bool Sunk = false;
+
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(*mi, *NewMI);
+
+ if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+ // FIXME: Temporary workaround. If the new instruction doesn't
+ // uses RegB, convertToThreeAddress must have created more
+ // then one instruction.
+ Sunk = sink3AddrInstruction(NewMI, RegB, mi);
+
+ MBB->erase(mi); // Nuke the old inst.
+
+ if (!Sunk) {
+ DistanceMap.insert(std::make_pair(NewMI, Dist));
+ mi = NewMI;
+ nmi = std::next(mi);
+ }
+
+ // Update source and destination register maps.
+ SrcRegMap.erase(RegA);
+ DstRegMap.erase(RegB);
+ return true;
+}
+
+/// Scan forward recursively for only uses, update maps if the use is a copy or
+/// a two-address instruction.
+void
+TwoAddressInstructionPass::scanUses(unsigned DstReg) {
+ SmallVector<unsigned, 4> VirtRegPairs;
+ bool IsDstPhys;
+ bool IsCopy = false;
+ unsigned NewReg = 0;
+ unsigned Reg = DstReg;
+ while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
+ NewReg, IsDstPhys)) {
+ if (IsCopy && !Processed.insert(UseMI).second)
+ break;
+
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+ if (DI != DistanceMap.end())
+ // Earlier in the same MBB.Reached via a back edge.
+ break;
+
+ if (IsDstPhys) {
+ VirtRegPairs.push_back(NewReg);
+ break;
+ }
+ bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
+ if (!isNew)
+ assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+ VirtRegPairs.push_back(NewReg);
+ Reg = NewReg;
+ }
+
+ if (!VirtRegPairs.empty()) {
+ unsigned ToReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ while (!VirtRegPairs.empty()) {
+ unsigned FromReg = VirtRegPairs.back();
+ VirtRegPairs.pop_back();
+ bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
+ ToReg = FromReg;
+ }
+ bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second;
+ if (!isNew)
+ assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!");
+ }
+}
+
+/// If the specified instruction is not yet processed, process it if it's a
+/// copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1 = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
+ if (Processed.count(MI))
+ return;
+
+ bool IsSrcPhys, IsDstPhys;
+ unsigned SrcReg, DstReg;
+ if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+ return;
+
+ if (IsDstPhys && !IsSrcPhys)
+ DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+ else if (!IsDstPhys && IsSrcPhys) {
+ bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+ if (!isNew)
+ assert(SrcRegMap[DstReg] == SrcReg &&
+ "Can't map to two src physical registers!");
+
+ scanUses(DstReg);
+ }
+
+ Processed.insert(MI);
+}
+
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the instruction below the kill instruction in order to
+/// eliminate the need for the copy.
+bool TwoAddressInstructionPass::
+rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
+ return false;
+
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = nullptr;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+ KillMI->isBranch() || KillMI->isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!MI->isSafeToMove(AA, SeenStore))
+ return false;
+
+ if (TII->getInstrLatency(InstrItins, *MI) > 1)
+ // FIXME: Needs more sophisticated heuristics.
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef())
+ Defs.insert(MOReg);
+ else {
+ Uses.insert(MOReg);
+ if (MOReg != Reg && (MO.isKill() ||
+ (LIS && isPlainlyKilled(MI, MOReg, LIS))))
+ Kills.insert(MOReg);
+ }
+ }
+
+ // Move the copies connected to MI down as well.
+ MachineBasicBlock::iterator Begin = MI;
+ MachineBasicBlock::iterator AfterMI = std::next(Begin);
+
+ MachineBasicBlock::iterator End = AfterMI;
+ while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
+ Defs.insert(End->getOperand(0).getReg());
+ ++End;
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ MachineBasicBlock::iterator KillPos = KillMI;
+ ++KillPos;
+ for (MachineInstr &OtherMI : llvm::make_range(End, KillPos)) {
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI.isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
+ OtherMI.isBranch() || OtherMI.isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ for (const MachineOperand &MO : OtherMI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isDef()) {
+ if (Uses.count(MOReg))
+ // Physical register use would be clobbered.
+ return false;
+ if (!MO.isDead() && Defs.count(MOReg))
+ // May clobber a physical register def.
+ // FIXME: This may be too conservative. It's ok if the instruction
+ // is sunken completely below the use.
+ return false;
+ } else {
+ if (Defs.count(MOReg))
+ return false;
+ bool isKill =
+ MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS));
+ if (MOReg != Reg &&
+ ((isKill && Uses.count(MOReg)) || Kills.count(MOReg)))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ if (MOReg == Reg && !isKill)
+ // We can't schedule across a use of the register in question.
+ return false;
+ // Ensure that if this is register in question, its the kill we expect.
+ assert((MOReg != Reg || &OtherMI == KillMI) &&
+ "Found multiple kills of a register in a basic block");
+ }
+ }
+ }
+
+ // Move debug info as well.
+ while (Begin != MBB->begin() && std::prev(Begin)->isDebugValue())
+ --Begin;
+
+ nmi = End;
+ MachineBasicBlock::iterator InsertPos = KillPos;
+ if (LIS) {
+ // We have to move the copies first so that the MBB is still well-formed
+ // when calling handleMove().
+ for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
+ auto CopyMI = MBBI++;
+ MBB->splice(InsertPos, MBB, CopyMI);
+ LIS->handleMove(*CopyMI);
+ InsertPos = CopyMI;
+ }
+ End = std::next(MachineBasicBlock::iterator(MI));
+ }
+
+ // Copies following MI may have been moved as well.
+ MBB->splice(InsertPos, MBB, Begin, End);
+ DistanceMap.erase(DI);
+
+ // Update live variables
+ if (LIS) {
+ LIS->handleMove(*MI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, *KillMI);
+ LV->addVirtualRegisterKilled(Reg, *MI);
+ }
+
+ DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
+ return true;
+}
+
+/// Return true if the re-scheduling will put the given instruction too close
+/// to the defs of its register dependencies.
+bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+ MachineInstr *MI) {
+ for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike())
+ continue;
+ if (&DefMI == MI)
+ return true; // MI is defining something KillMI uses
+ DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(&DefMI);
+ if (DDI == DistanceMap.end())
+ return true; // Below MI
+ unsigned DefDist = DDI->second;
+ assert(Dist > DefDist && "Visited def already?");
+ if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
+ return true;
+ }
+ return false;
+}
+
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the kill instruction above the current two-address
+/// instruction in order to eliminate the need for the copy.
+bool TwoAddressInstructionPass::
+rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned Reg) {
+ // Bail immediately if we don't have LV or LIS available. We use them to find
+ // kills efficiently.
+ if (!LV && !LIS)
+ return false;
+
+ MachineInstr *MI = &*mi;
+ DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+ if (DI == DistanceMap.end())
+ // Must be created from unfolded load. Don't waste time trying this.
+ return false;
+
+ MachineInstr *KillMI = nullptr;
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ assert(LI.end() != LI.begin() &&
+ "Reg should not have empty live interval.");
+
+ SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
+ LiveInterval::const_iterator I = LI.find(MBBEndIdx);
+ if (I != LI.end() && I->start < MBBEndIdx)
+ return false;
+
+ --I;
+ KillMI = LIS->getInstructionFromIndex(I->end);
+ } else {
+ KillMI = LV->getVarInfo(Reg).findKill(MBB);
+ }
+ if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+ // Don't mess with copies, they may be coalesced later.
+ return false;
+
+ unsigned DstReg;
+ if (isTwoAddrUse(*KillMI, Reg, DstReg))
+ return false;
+
+ bool SeenStore = true;
+ if (!KillMI->isSafeToMove(AA, SeenStore))
+ return false;
+
+ SmallSet<unsigned, 2> Uses;
+ SmallSet<unsigned, 2> Kills;
+ SmallSet<unsigned, 2> Defs;
+ SmallSet<unsigned, 2> LiveDefs;
+ for (const MachineOperand &MO : KillMI->operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MO.isUse()) {
+ if (!MOReg)
+ continue;
+ if (isDefTooClose(MOReg, DI->second, MI))
+ return false;
+ bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS));
+ if (MOReg == Reg && !isKill)
+ return false;
+ Uses.insert(MOReg);
+ if (isKill && MOReg != Reg)
+ Kills.insert(MOReg);
+ } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ Defs.insert(MOReg);
+ if (!MO.isDead())
+ LiveDefs.insert(MOReg);
+ }
+ }
+
+ // Check if the reschedule will not break depedencies.
+ unsigned NumVisited = 0;
+ for (MachineInstr &OtherMI :
+ llvm::make_range(mi, MachineBasicBlock::iterator(KillMI))) {
+ // DBG_VALUE cannot be counted against the limit.
+ if (OtherMI.isDebugValue())
+ continue;
+ if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
+ return false;
+ ++NumVisited;
+ if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() ||
+ OtherMI.isBranch() || OtherMI.isTerminator())
+ // Don't move pass calls, etc.
+ return false;
+ SmallVector<unsigned, 2> OtherDefs;
+ for (const MachineOperand &MO : OtherMI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (!MOReg)
+ continue;
+ if (MO.isUse()) {
+ if (Defs.count(MOReg))
+ // Moving KillMI can clobber the physical register if the def has
+ // not been seen.
+ return false;
+ if (Kills.count(MOReg))
+ // Don't want to extend other live ranges and update kills.
+ return false;
+ if (&OtherMI != MI && MOReg == Reg &&
+ !(MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))))
+ // We can't schedule across a use of the register in question.
+ return false;
+ } else {
+ OtherDefs.push_back(MOReg);
+ }
+ }
+
+ for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
+ unsigned MOReg = OtherDefs[i];
+ if (Uses.count(MOReg))
+ return false;
+ if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+ LiveDefs.count(MOReg))
+ return false;
+ // Physical register def is seen.
+ Defs.erase(MOReg);
+ }
+ }
+
+ // Move the old kill above MI, don't forget to move debug info as well.
+ MachineBasicBlock::iterator InsertPos = mi;
+ while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugValue())
+ --InsertPos;
+ MachineBasicBlock::iterator From = KillMI;
+ MachineBasicBlock::iterator To = std::next(From);
+ while (std::prev(From)->isDebugValue())
+ --From;
+ MBB->splice(InsertPos, MBB, From, To);
+
+ nmi = std::prev(InsertPos); // Backtrack so we process the moved instr.
+ DistanceMap.erase(DI);
+
+ // Update live variables
+ if (LIS) {
+ LIS->handleMove(*KillMI);
+ } else {
+ LV->removeVirtualRegisterKilled(Reg, *KillMI);
+ LV->addVirtualRegisterKilled(Reg, *MI);
+ }
+
+ DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
+ return true;
+}
+
+/// Tries to commute the operand 'BaseOpIdx' and some other operand in the
+/// given machine instruction to improve opportunities for coalescing and
+/// elimination of a register to register copy.
+///
+/// 'DstOpIdx' specifies the index of MI def operand.
+/// 'BaseOpKilled' specifies if the register associated with 'BaseOpIdx'
+/// operand is killed by the given instruction.
+/// The 'Dist' arguments provides the distance of MI from the start of the
+/// current basic block and it is used to determine if it is profitable
+/// to commute operands in the instruction.
+///
+/// Returns true if the transformation happened. Otherwise, returns false.
+bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
+ unsigned DstOpIdx,
+ unsigned BaseOpIdx,
+ bool BaseOpKilled,
+ unsigned Dist) {
+ unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
+ unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
+ unsigned OpsNum = MI->getDesc().getNumOperands();
+ unsigned OtherOpIdx = MI->getDesc().getNumDefs();
+ for (; OtherOpIdx < OpsNum; OtherOpIdx++) {
+ // The call of findCommutedOpIndices below only checks if BaseOpIdx
+ // and OtherOpIdx are commutable, it does not really search for
+ // other commutable operands and does not change the values of passed
+ // variables.
+ if (OtherOpIdx == BaseOpIdx ||
+ !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx))
+ continue;
+
+ unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
+ bool AggressiveCommute = false;
+
+ // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
+ // operands. This makes the live ranges of DstOp and OtherOp joinable.
+ bool DoCommute =
+ !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+
+ if (!DoCommute &&
+ isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) {
+ DoCommute = true;
+ AggressiveCommute = true;
+ }
+
+ // If it's profitable to commute, try to do so.
+ if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) {
+ ++NumCommuted;
+ if (AggressiveCommute)
+ ++NumAggrCommuted;
+ return true;
+ }
+ }
+ return false;
+}
+
+/// For the case where an instruction has a single pair of tied register
+/// operands, attempt some transformations that may either eliminate the tied
+/// operands or improve the opportunities for coalescing away the register copy.
+/// Returns true if no copy needs to be inserted to untie mi's operands
+/// (either because they were untied, or because mi was rescheduled, and will
+/// be visited again later). If the shouldOnlyCommute flag is true, only
+/// instruction commutation is attempted.
+bool TwoAddressInstructionPass::
+tryInstructionTransform(MachineBasicBlock::iterator &mi,
+ MachineBasicBlock::iterator &nmi,
+ unsigned SrcIdx, unsigned DstIdx,
+ unsigned Dist, bool shouldOnlyCommute) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
+ MachineInstr &MI = *mi;
+ unsigned regA = MI.getOperand(DstIdx).getReg();
+ unsigned regB = MI.getOperand(SrcIdx).getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ "cannot make instruction into two-address form");
+ bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+
+ if (TargetRegisterInfo::isVirtualRegister(regA))
+ scanUses(regA);
+
+ bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
+
+ // If the instruction is convertible to 3 Addr, instead
+ // of returning try 3 Addr transformation aggresively and
+ // use this variable to check later. Because it might be better.
+ // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
+ // instead of the following code.
+ // addl %esi, %edi
+ // movl %edi, %eax
+ // ret
+ if (Commuted && !MI.isConvertibleTo3Addr())
+ return false;
+
+ if (shouldOnlyCommute)
+ return false;
+
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule this MI below it.
+ if (!Commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
+ ++NumReSchedDowns;
+ return true;
+ }
+
+ // If we commuted, regB may have changed so we should re-sample it to avoid
+ // confusing the three address conversion below.
+ if (Commuted) {
+ regB = MI.getOperand(SrcIdx).getReg();
+ regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+ }
+
+ if (MI.isConvertibleTo3Addr()) {
+ // This instruction is potentially convertible to a true
+ // three-address instruction. Check if it is profitable.
+ if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
+ // Try to convert it.
+ if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) {
+ ++NumConvertedTo3Addr;
+ return true; // Done with this instruction.
+ }
+ }
+ }
+
+ // Return if it is commuted but 3 addr conversion is failed.
+ if (Commuted)
+ return false;
+
+ // If there is one more use of regB later in the same MBB, consider
+ // re-schedule it before this MI if it's legal.
+ if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) {
+ ++NumReSchedUps;
+ return true;
+ }
+
+ // If this is an instruction with a load folded into it, try unfolding
+ // the load, e.g. avoid this:
+ // movq %rdx, %rcx
+ // addq (%rax), %rcx
+ // in favor of this:
+ // movq (%rax), %rcx
+ // addq %rdx, %rcx
+ // because it's preferable to schedule a load than a register copy.
+ if (MI.mayLoad() && !regBKilled) {
+ // Determine if a load can be unfolded.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc != 0) {
+ const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
+ if (UnfoldMCID.getNumDefs() == 1) {
+ // Unfold the load.
+ DEBUG(dbgs() << "2addr: UNFOLDING: " << MI);
+ const TargetRegisterClass *RC =
+ TRI->getAllocatableClass(
+ TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
+ unsigned Reg = MRI->createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 2> NewMIs;
+ if (!TII->unfoldMemoryOperand(*MF, MI, Reg,
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false, NewMIs)) {
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ return false;
+ }
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ // The load was previously folded, so this is the only use.
+ NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+ // Tentatively insert the instructions into the block so that they
+ // look "normal" to the transformation logic.
+ MBB->insert(mi, NewMIs[0]);
+ MBB->insert(mi, NewMIs[1]);
+
+ DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
+ << "2addr: NEW INST: " << *NewMIs[1]);
+
+ // Transform the instruction, now that it no longer has a load.
+ unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+ unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+ MachineBasicBlock::iterator NewMI = NewMIs[1];
+ bool TransformResult =
+ tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true);
+ (void)TransformResult;
+ assert(!TransformResult &&
+ "tryInstructionTransform() should return false.");
+ if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+ // Success, or at least we made an improvement. Keep the unfolded
+ // instructions and discard the original.
+ if (LV) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isUse()) {
+ if (MO.isKill()) {
+ if (NewMIs[0]->killsRegister(MO.getReg()))
+ LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[0]);
+ else {
+ assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+ "Kill missing after load unfold!");
+ LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[1]);
+ }
+ }
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), MI)) {
+ if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+ LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[1]);
+ else {
+ assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+ "Dead flag missing after load unfold!");
+ LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[0]);
+ }
+ }
+ }
+ }
+ LV->addVirtualRegisterKilled(Reg, *NewMIs[1]);
+ }
+
+ SmallVector<unsigned, 4> OrigRegs;
+ if (LIS) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg())
+ OrigRegs.push_back(MO.getReg());
+ }
+ }
+
+ MI.eraseFromParent();
+
+ // Update LiveIntervals.
+ if (LIS) {
+ MachineBasicBlock::iterator Begin(NewMIs[0]);
+ MachineBasicBlock::iterator End(NewMIs[1]);
+ LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs);
+ }
+
+ mi = NewMIs[1];
+ } else {
+ // Transforming didn't eliminate the tie and didn't lead to an
+ // improvement. Clean up the unfolded instructions and keep the
+ // original.
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+// Collect tied operands of MI that need to be handled.
+// Rewrite trivial cases immediately.
+// Return true if any tied operands where found, including the trivial ones.
+bool TwoAddressInstructionPass::
+collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ bool AnyOps = false;
+ unsigned NumOps = MI->getNumOperands();
+
+ for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+ unsigned DstIdx = 0;
+ if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+ continue;
+ AnyOps = true;
+ MachineOperand &SrcMO = MI->getOperand(SrcIdx);
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned SrcReg = SrcMO.getReg();
+ unsigned DstReg = DstMO.getReg();
+ // Tied constraint already satisfied?
+ if (SrcReg == DstReg)
+ continue;
+
+ assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
+
+ // Deal with <undef> uses immediately - simply rewrite the src operand.
+ if (SrcMO.isUndef() && !DstMO.getSubReg()) {
+ // Constrain the DstReg register class if required.
+ if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
+ TRI, *MF))
+ MRI->constrainRegClass(DstReg, RC);
+ SrcMO.setReg(DstReg);
+ SrcMO.setSubReg(0);
+ DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+ continue;
+ }
+ TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
+ }
+ return AnyOps;
+}
+
+// Process a list of tied MI operands that all use the same source register.
+// The tied pairs are of the form (SrcIdx, DstIdx).
+void
+TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
+ TiedPairList &TiedPairs,
+ unsigned &Dist) {
+ bool IsEarlyClobber = false;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second);
+ IsEarlyClobber |= DstMO.isEarlyClobber();
+ }
+
+ bool RemovedKillFlag = false;
+ bool AllUsesCopied = true;
+ unsigned LastCopiedReg = 0;
+ SlotIndex LastCopyIdx;
+ unsigned RegB = 0;
+ unsigned SubRegB = 0;
+ for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+ unsigned SrcIdx = TiedPairs[tpi].first;
+ unsigned DstIdx = TiedPairs[tpi].second;
+
+ const MachineOperand &DstMO = MI->getOperand(DstIdx);
+ unsigned RegA = DstMO.getReg();
+
+ // Grab RegB from the instruction because it may have changed if the
+ // instruction was commuted.
+ RegB = MI->getOperand(SrcIdx).getReg();
+ SubRegB = MI->getOperand(SrcIdx).getSubReg();
+
+ if (RegA == RegB) {
+ // The register is tied to multiple destinations (or else we would
+ // not have continued this far), but this use of the register
+ // already matches the tied destination. Leave it.
+ AllUsesCopied = false;
+ continue;
+ }
+ LastCopiedReg = RegA;
+
+ assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+ // First, verify that we don't have a use of "a" in the instruction
+ // (a = b + a for example) because our transformation will not
+ // work. This should never occur because we are in SSA form.
+ for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+ assert(i == DstIdx ||
+ !MI->getOperand(i).isReg() ||
+ MI->getOperand(i).getReg() != RegA);
+#endif
+
+ // Emit a copy.
+ MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), RegA);
+ // If this operand is folding a truncation, the truncation now moves to the
+ // copy so that the register classes remain valid for the operands.
+ MIB.addReg(RegB, 0, SubRegB);
+ const TargetRegisterClass *RC = MRI->getRegClass(RegB);
+ if (SubRegB) {
+ if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA),
+ SubRegB) &&
+ "tied subregister must be a truncation");
+ // The superreg class will not be used to constrain the subreg class.
+ RC = nullptr;
+ }
+ else {
+ assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
+ && "tied subregister must be a truncation");
+ }
+ }
+
+ // Update DistanceMap.
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ DistanceMap.insert(std::make_pair(&*PrevMI, Dist));
+ DistanceMap[MI] = ++Dist;
+
+ if (LIS) {
+ LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
+
+ if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ LiveInterval &LI = LIS->getInterval(RegA);
+ VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ SlotIndex endIdx =
+ LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);
+ LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI));
+ }
+ }
+
+ DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);
+
+ MachineOperand &MO = MI->getOperand(SrcIdx);
+ assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
+ "inconsistent operand info for 2-reg pass");
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+
+ // Make sure regA is a legal regclass for the SrcIdx operand.
+ if (TargetRegisterInfo::isVirtualRegister(RegA) &&
+ TargetRegisterInfo::isVirtualRegister(RegB))
+ MRI->constrainRegClass(RegA, RC);
+ MO.setReg(RegA);
+ // The getMatchingSuper asserts guarantee that the register class projected
+ // by SubRegB is compatible with RegA with no subregister. So regardless of
+ // whether the dest oper writes a subreg, the source oper should not.
+ MO.setSubReg(0);
+
+ // Propagate SrcRegMap.
+ SrcRegMap[RegA] = RegB;
+ }
+
+ if (AllUsesCopied) {
+ if (!IsEarlyClobber) {
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+ MO.isUse()) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
+ }
+ MO.setReg(LastCopiedReg);
+ MO.setSubReg(0);
+ }
+ }
+ }
+
+ // Update live variables for regB.
+ if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(*MI)) {
+ MachineBasicBlock::iterator PrevMI = MI;
+ --PrevMI;
+ LV->addVirtualRegisterKilled(RegB, *PrevMI);
+ }
+
+ // Update LiveIntervals.
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(RegB);
+ SlotIndex MIIdx = LIS->getInstructionIndex(*MI);
+ LiveInterval::const_iterator I = LI.find(MIIdx);
+ assert(I != LI.end() && "RegB must be live-in to use.");
+
+ SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber);
+ if (I->end == UseIdx)
+ LI.removeSegment(LastCopyIdx, UseIdx);
+ }
+
+ } else if (RemovedKillFlag) {
+ // Some tied uses of regB matched their destination registers, so
+ // regB is still used in this instruction, but a kill flag was
+ // removed from a different tied use of regB, so now we need to add
+ // a kill flag to one of the remaining uses of regB.
+ for (MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ MO.setIsKill(true);
+ break;
+ }
+ }
+ }
+}
+
+/// Reduce two-address instructions to two operands.
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ const TargetMachine &TM = MF->getTarget();
+ MRI = &MF->getRegInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ InstrItins = MF->getSubtarget().getInstrItineraryData();
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ OptLevel = TM.getOptLevel();
+
+ bool MadeChange = false;
+
+ DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+ DEBUG(dbgs() << "********** Function: "
+ << MF->getName() << '\n');
+
+ // This pass takes the function out of SSA form.
+ MRI->leaveSSA();
+
+ TiedOperandMap TiedOperands;
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ MBB = &*MBBI;
+ unsigned Dist = 0;
+ DistanceMap.clear();
+ SrcRegMap.clear();
+ DstRegMap.clear();
+ Processed.clear();
+ for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
+ mi != me; ) {
+ MachineBasicBlock::iterator nmi = std::next(mi);
+ if (mi->isDebugValue()) {
+ mi = nmi;
+ continue;
+ }
+
+ // Expand REG_SEQUENCE instructions. This will position mi at the first
+ // expanded instruction.
+ if (mi->isRegSequence())
+ eliminateRegSequence(mi);
+
+ DistanceMap.insert(std::make_pair(&*mi, ++Dist));
+
+ processCopy(&*mi);
+
+ // First scan through all the tied register uses in this instruction
+ // and record a list of pairs of tied operands for each register.
+ if (!collectTiedOperands(&*mi, TiedOperands)) {
+ mi = nmi;
+ continue;
+ }
+
+ ++NumTwoAddressInstrs;
+ MadeChange = true;
+ DEBUG(dbgs() << '\t' << *mi);
+
+ // If the instruction has a single pair of tied operands, try some
+ // transformations that may either eliminate the tied operands or
+ // improve the opportunities for coalescing away the register copy.
+ if (TiedOperands.size() == 1) {
+ SmallVectorImpl<std::pair<unsigned, unsigned> > &TiedPairs
+ = TiedOperands.begin()->second;
+ if (TiedPairs.size() == 1) {
+ unsigned SrcIdx = TiedPairs[0].first;
+ unsigned DstIdx = TiedPairs[0].second;
+ unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
+ unsigned DstReg = mi->getOperand(DstIdx).getReg();
+ if (SrcReg != DstReg &&
+ tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
+ // The tied operands have been eliminated or shifted further down
+ // the block to ease elimination. Continue processing with 'nmi'.
+ TiedOperands.clear();
+ mi = nmi;
+ continue;
+ }
+ }
+ }
+
+ // Now iterate over the information collected above.
+ for (auto &TO : TiedOperands) {
+ processTiedPairs(&*mi, TO.second, Dist);
+ DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ }
+
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ }
+
+ // Clear TiedOperands here instead of at the top of the loop
+ // since most instructions do not have tied operands.
+ TiedOperands.clear();
+ mi = nmi;
+ }
+ }
+
+ if (LIS)
+ MF->verify(this, "After two-address instruction pass");
+
+ return MadeChange;
+}
+
+/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process.
+///
+/// The instruction is turned into a sequence of sub-register copies:
+///
+/// %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1
+///
+/// Becomes:
+///
+/// %dst:ssub0<def,undef> = COPY %v1
+/// %dst:ssub1<def> = COPY %v2
+///
+void TwoAddressInstructionPass::
+eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (MI.getOperand(0).getSubReg() ||
+ TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ !(MI.getNumOperands() & 1)) {
+ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
+ llvm_unreachable(nullptr);
+ }
+
+ SmallVector<unsigned, 4> OrigRegs;
+ if (LIS) {
+ OrigRegs.push_back(MI.getOperand(0).getReg());
+ for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2)
+ OrigRegs.push_back(MI.getOperand(i).getReg());
+ }
+
+ bool DefEmitted = false;
+ for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
+ MachineOperand &UseMO = MI.getOperand(i);
+ unsigned SrcReg = UseMO.getReg();
+ unsigned SubIdx = MI.getOperand(i+1).getImm();
+ // Nothing needs to be inserted for <undef> operands.
+ if (UseMO.isUndef())
+ continue;
+
+ // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+ // might insert a COPY that uses SrcReg after is was killed.
+ bool isKill = UseMO.isKill();
+ if (isKill)
+ for (unsigned j = i + 2; j < e; j += 2)
+ if (MI.getOperand(j).getReg() == SrcReg) {
+ MI.getOperand(j).setIsKill();
+ UseMO.setIsKill(false);
+ isKill = false;
+ break;
+ }
+
+ // Insert the sub-register copy.
+ MachineInstr *CopyMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, SubIdx)
+ .addOperand(UseMO);
+
+ // The first def needs an <undef> flag because there is no live register
+ // before it.
+ if (!DefEmitted) {
+ CopyMI->getOperand(0).setIsUndef(true);
+ // Return an iterator pointing to the first inserted instr.
+ MBBI = CopyMI;
+ }
+ DefEmitted = true;
+
+ // Update LiveVariables' kill info.
+ if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ LV->replaceKillInstruction(SrcReg, MI, *CopyMI);
+
+ DEBUG(dbgs() << "Inserted: " << *CopyMI);
+ }
+
+ MachineBasicBlock::iterator EndMBBI =
+ std::next(MachineBasicBlock::iterator(MI));
+
+ if (!DefEmitted) {
+ DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF");
+ MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
+ MI.RemoveOperand(j);
+ } else {
+ DEBUG(dbgs() << "Eliminated: " << MI);
+ MI.eraseFromParent();
+ }
+
+ // Udpate LiveIntervals.
+ if (LIS)
+ LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs);
+}
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 000000000000..501e01c45a8b
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,220 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass. Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node. To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack. In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks. Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/UnreachableBlockElim.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+static bool eliminateUnreachableBlock(Function &F) {
+ SmallPtrSet<BasicBlock*, 8> Reachable;
+
+ // Mark all reachable blocks.
+ for (BasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<BasicBlock*> DeadBlocks;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+ if (!Reachable.count(&*I)) {
+ BasicBlock *BB = &*I;
+ DeadBlocks.push_back(BB);
+ while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+ PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+ BB->getInstList().pop_front();
+ }
+ for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+ (*SI)->removePredecessor(BB);
+ BB->dropAllReferences();
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ DeadBlocks[i]->eraseFromParent();
+ }
+
+ return !DeadBlocks.empty();
+}
+
+namespace {
+class UnreachableBlockElimLegacyPass : public FunctionPass {
+ bool runOnFunction(Function &F) override {
+ return eliminateUnreachableBlock(F);
+ }
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableBlockElimLegacyPass() : FunctionPass(ID) {
+ initializeUnreachableBlockElimLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+};
+}
+char UnreachableBlockElimLegacyPass::ID = 0;
+INITIALIZE_PASS(UnreachableBlockElimLegacyPass, "unreachableblockelim",
+ "Remove unreachable blocks from the CFG", false, false)
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+ return new UnreachableBlockElimLegacyPass();
+}
+
+PreservedAnalyses UnreachableBlockElimPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = eliminateUnreachableBlock(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+namespace {
+ class UnreachableMachineBlockElim : public MachineFunctionPass {
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ MachineModuleInfo *MMI;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
+ };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks", false, false)
+
+char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
+
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+ bool ModifiedPHI = false;
+
+ MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
+ // Mark all reachable blocks.
+ for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable))
+ (void)BB/* Mark all reachable blocks */;
+
+ // Loop over all dead blocks, remembering them and deleting all instructions
+ // in them.
+ std::vector<MachineBasicBlock*> DeadBlocks;
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = &*I;
+
+ // Test for deadness.
+ if (!Reachable.count(BB)) {
+ DeadBlocks.push_back(BB);
+
+ // Update dominator and loop info.
+ if (MLI) MLI->removeBlock(BB);
+ if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+
+ while (BB->succ_begin() != BB->succ_end()) {
+ MachineBasicBlock* succ = *BB->succ_begin();
+
+ MachineBasicBlock::iterator start = succ->begin();
+ while (start != succ->end() && start->isPHI()) {
+ for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
+ if (start->getOperand(i).isMBB() &&
+ start->getOperand(i).getMBB() == BB) {
+ start->RemoveOperand(i);
+ start->RemoveOperand(i-1);
+ }
+
+ start++;
+ }
+
+ BB->removeSuccessor(BB->succ_begin());
+ }
+ }
+ }
+
+ // Actually remove the blocks now.
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+ DeadBlocks[i]->eraseFromParent();
+
+ // Cleanup PHI nodes.
+ for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ MachineBasicBlock *BB = &*I;
+ // Prune unneeded PHI entries.
+ SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
+ BB->pred_end());
+ MachineBasicBlock::iterator phi = BB->begin();
+ while (phi != BB->end() && phi->isPHI()) {
+ for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
+ if (!preds.count(phi->getOperand(i).getMBB())) {
+ phi->RemoveOperand(i);
+ phi->RemoveOperand(i-1);
+ ModifiedPHI = true;
+ }
+
+ if (phi->getNumOperands() == 3) {
+ unsigned Input = phi->getOperand(1).getReg();
+ unsigned Output = phi->getOperand(0).getReg();
+
+ phi++->eraseFromParent();
+ ModifiedPHI = true;
+
+ if (Input != Output) {
+ MachineRegisterInfo &MRI = F.getRegInfo();
+ MRI.constrainRegClass(Input, MRI.getRegClass(Output));
+ MRI.replaceRegWith(Output, Input);
+ }
+
+ continue;
+ }
+
+ ++phi;
+ }
+ }
+
+ F.RenumberBlocks();
+
+ return (!DeadBlocks.empty() || ModifiedPHI);
+}
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 000000000000..8a3a0328870d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,467 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "LiveDebugVariables.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_TYPE "regalloc"
+
+STATISTIC(NumSpillSlots, "Number of spill slots allocated");
+STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
+
+//===----------------------------------------------------------------------===//
+// VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+ MRI = &mf.getRegInfo();
+ TII = mf.getSubtarget().getInstrInfo();
+ TRI = mf.getSubtarget().getRegisterInfo();
+ MF = &mf;
+
+ Virt2PhysMap.clear();
+ Virt2StackSlotMap.clear();
+ Virt2SplitMap.clear();
+
+ grow();
+ return false;
+}
+
+void VirtRegMap::grow() {
+ unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+ Virt2PhysMap.resize(NumRegs);
+ Virt2StackSlotMap.resize(NumRegs);
+ Virt2SplitMap.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+ ++NumSpillSlots;
+ return SS;
+}
+
+bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) {
+ unsigned Hint = MRI->getSimpleHint(VirtReg);
+ if (!Hint)
+ return false;
+ if (TargetRegisterInfo::isVirtualRegister(Hint))
+ Hint = getPhys(Hint);
+ return getPhys(VirtReg) == Hint;
+}
+
+bool VirtRegMap::hasKnownPreference(unsigned VirtReg) {
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
+ if (TargetRegisterInfo::isPhysicalRegister(Hint.second))
+ return true;
+ if (TargetRegisterInfo::isVirtualRegister(Hint.second))
+ return hasPhys(Hint.second);
+ return false;
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+ return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
+ assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+ assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+ "attempt to assign stack slot to already spilled register");
+ assert((SS >= 0 ||
+ (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+ "illegal fixed frame index");
+ Virt2StackSlotMap[virtReg] = SS;
+}
+
+void VirtRegMap::print(raw_ostream &OS, const Module*) const {
+ OS << "********** REGISTER MAP **********\n";
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> "
+ << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
+ }
+ }
+
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
+ }
+ }
+ OS << '\n';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void VirtRegMap::dump() const {
+ print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// VirtRegRewriter
+//===----------------------------------------------------------------------===//
+//
+// The VirtRegRewriter is the last of the register allocator passes.
+// It rewrites virtual registers to physical registers as specified in the
+// VirtRegMap analysis. It also updates live-in information on basic blocks
+// according to LiveIntervals.
+//
+namespace {
+class VirtRegRewriter : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ SlotIndexes *Indexes;
+ LiveIntervals *LIS;
+ VirtRegMap *VRM;
+
+ void rewrite();
+ void addMBBLiveIns();
+ bool readsUndefSubreg(const MachineOperand &MO) const;
+ void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
+ void handleIdentityCopy(MachineInstr &MI) const;
+
+public:
+ static char ID;
+ VirtRegRewriter() : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction&) override;
+ MachineFunctionProperties getSetProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
+};
+} // end anonymous namespace
+
+char &llvm::VirtRegRewriterID = VirtRegRewriter::ID;
+
+INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
+ "Virtual Register Rewriter", false, false)
+
+char VirtRegRewriter::ID = 0;
+
+void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<VirtRegMap>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
+ MF = &fn;
+ TM = &MF->getTarget();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ MRI = &MF->getRegInfo();
+ Indexes = &getAnalysis<SlotIndexes>();
+ LIS = &getAnalysis<LiveIntervals>();
+ VRM = &getAnalysis<VirtRegMap>();
+ DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+ << "********** Function: "
+ << MF->getName() << '\n');
+ DEBUG(VRM->dump());
+
+ // Add kill flags while we still have virtual registers.
+ LIS->addKillFlags(VRM);
+
+ // Live-in lists on basic blocks are required for physregs.
+ addMBBLiveIns();
+
+ // Rewrite virtual registers.
+ rewrite();
+
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
+
+ // All machine operands and other references to virtual registers have been
+ // replaced. Remove the virtual registers and release all the transient data.
+ VRM->clearAllVirt();
+ MRI->clearVirtRegs();
+ return true;
+}
+
+void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
+ unsigned PhysReg) const {
+ assert(!LI.empty());
+ assert(LI.hasSubRanges());
+
+ typedef std::pair<const LiveInterval::SubRange *,
+ LiveInterval::const_iterator> SubRangeIteratorPair;
+ SmallVector<SubRangeIteratorPair, 4> SubRanges;
+ SlotIndex First;
+ SlotIndex Last;
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ SubRanges.push_back(std::make_pair(&SR, SR.begin()));
+ if (!First.isValid() || SR.segments.front().start < First)
+ First = SR.segments.front().start;
+ if (!Last.isValid() || SR.segments.back().end > Last)
+ Last = SR.segments.back().end;
+ }
+
+ // Check all mbb start positions between First and Last while
+ // simulatenously advancing an iterator for each subrange.
+ for (SlotIndexes::MBBIndexIterator MBBI = Indexes->findMBBIndex(First);
+ MBBI != Indexes->MBBIndexEnd() && MBBI->first <= Last; ++MBBI) {
+ SlotIndex MBBBegin = MBBI->first;
+ // Advance all subrange iterators so that their end position is just
+ // behind MBBBegin (or the iterator is at the end).
+ LaneBitmask LaneMask = 0;
+ for (auto &RangeIterPair : SubRanges) {
+ const LiveInterval::SubRange *SR = RangeIterPair.first;
+ LiveInterval::const_iterator &SRI = RangeIterPair.second;
+ while (SRI != SR->end() && SRI->end <= MBBBegin)
+ ++SRI;
+ if (SRI == SR->end())
+ continue;
+ if (SRI->start <= MBBBegin)
+ LaneMask |= SR->LaneMask;
+ }
+ if (LaneMask == 0)
+ continue;
+ MachineBasicBlock *MBB = MBBI->second;
+ MBB->addLiveIn(PhysReg, LaneMask);
+ }
+}
+
+// Compute MBB live-in lists from virtual register live ranges and their
+// assignments.
+void VirtRegRewriter::addMBBLiveIns() {
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
+ if (MRI->reg_nodbg_empty(VirtReg))
+ continue;
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ if (LI.empty() || LIS->intervalIsInOneMBB(LI))
+ continue;
+ // This is a virtual register that is live across basic blocks. Its
+ // assigned PhysReg must be marked as live-in to those blocks.
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
+
+ if (LI.hasSubRanges()) {
+ addLiveInsForSubRanges(LI, PhysReg);
+ } else {
+ // Go over MBB begin positions and see if we have segments covering them.
+ // The following works because segments and the MBBIndex list are both
+ // sorted by slot indexes.
+ SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin();
+ for (const auto &Seg : LI) {
+ I = Indexes->advanceMBBIndex(I, Seg.start);
+ for (; I != Indexes->MBBIndexEnd() && I->first < Seg.end; ++I) {
+ MachineBasicBlock *MBB = I->second;
+ MBB->addLiveIn(PhysReg);
+ }
+ }
+ }
+ }
+
+ // Sort and unique MBB LiveIns as we've not checked if SubReg/PhysReg were in
+ // each MBB's LiveIns set before calling addLiveIn on them.
+ for (MachineBasicBlock &MBB : *MF)
+ MBB.sortUniqueLiveIns();
+}
+
+/// Returns true if the given machine operand \p MO only reads undefined lanes.
+/// The function only works for use operands with a subregister set.
+bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
+ // Shortcut if the operand is already marked undef.
+ if (MO.isUndef())
+ return true;
+
+ unsigned Reg = MO.getReg();
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ const MachineInstr &MI = *MO.getParent();
+ SlotIndex BaseIndex = LIS->getInstructionIndex(MI);
+ // This code is only meant to handle reading undefined subregisters which
+ // we couldn't properly detect before.
+ assert(LI.liveAt(BaseIndex) &&
+ "Reads of completely dead register should be marked undef already");
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ // See if any of the relevant subregister liveranges is defined at this point.
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex))
+ return false;
+ }
+ return true;
+}
+
+void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
+ if (!MI.isIdentityCopy())
+ return;
+ DEBUG(dbgs() << "Identity copy: " << MI);
+ ++NumIdCopies;
+
+ // Copies like:
+ // %R0 = COPY %R0<undef>
+ // %AL = COPY %AL, %EAX<imp-def>
+ // give us additional liveness information: The target (super-)register
+ // must not be valid before this point. Replace the COPY with a KILL
+ // instruction to maintain this information.
+ if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
+ MI.setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << " replace by: " << MI);
+ return;
+ }
+
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ DEBUG(dbgs() << " deleted.\n");
+}
+
+void VirtRegRewriter::rewrite() {
+ bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
+ SmallVector<unsigned, 8> SuperDeads;
+ SmallVector<unsigned, 8> SuperDefs;
+ SmallVector<unsigned, 8> SuperKills;
+
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ DEBUG(MBBI->print(dbgs(), Indexes));
+ for (MachineBasicBlock::instr_iterator
+ MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
+ MachineInstr *MI = &*MII;
+ ++MII;
+
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+
+ // Make sure MRI knows about registers clobbered by regmasks.
+ if (MO.isRegMask())
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ unsigned VirtReg = MO.getReg();
+ unsigned PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
+ "Instruction uses unmapped VirtReg");
+ assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
+
+ // Preserve semantics of sub-register operands.
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg != 0) {
+ if (NoSubRegLiveness) {
+ // A virtual register kill refers to the whole register, so we may
+ // have to add <imp-use,kill> operands for the super-register. A
+ // partial redef always kills and redefines the super-register.
+ if (MO.readsReg() && (MO.isDef() || MO.isKill()))
+ SuperKills.push_back(PhysReg);
+
+ if (MO.isDef()) {
+ // Also add implicit defs for the super-register.
+ if (MO.isDead())
+ SuperDeads.push_back(PhysReg);
+ else
+ SuperDefs.push_back(PhysReg);
+ }
+ } else {
+ if (MO.isUse()) {
+ if (readsUndefSubreg(MO))
+ // We need to add an <undef> flag if the subregister is
+ // completely undefined (and we are not adding super-register
+ // defs).
+ MO.setIsUndef(true);
+ } else if (!MO.isDead()) {
+ assert(MO.isDef());
+ }
+ }
+
+ // The <def,undef> flag only makes sense for sub-register defs, and
+ // we are substituting a full physreg. An <imp-use,kill> operand
+ // from the SuperKills list will represent the partial read of the
+ // super-register.
+ if (MO.isDef())
+ MO.setIsUndef(false);
+
+ // PhysReg operands cannot have subregister indexes.
+ PhysReg = TRI->getSubReg(PhysReg, SubReg);
+ assert(PhysReg && "Invalid SubReg for physical register");
+ MO.setSubReg(0);
+ }
+ // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+ // we need the inlining here.
+ MO.setReg(PhysReg);
+ }
+
+ // Add any missing super-register kills after rewriting the whole
+ // instruction.
+ while (!SuperKills.empty())
+ MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+ while (!SuperDeads.empty())
+ MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+
+ while (!SuperDefs.empty())
+ MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+
+ DEBUG(dbgs() << "> " << *MI);
+
+ // We can remove identity copies right now.
+ handleIdentityCopy(*MI);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
new file mode 100644
index 000000000000..041fb7b912bf
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -0,0 +1,1235 @@
+//===-- WinEHPrepare - Prepare exception handling for code generation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers LLVM IR exception handling into something closer to what the
+// backend wants for functions using a personality function from a runtime
+// provided by MSVC. Functions with other personality functions are left alone
+// and may be prepared by other passes. In particular, all supported MSVC
+// personality functions require cleanup code to be outlined, and the C++
+// personality requires catch handler code to be outlined.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "winehprepare"
+
+static cl::opt<bool> DisableDemotion(
+ "disable-demotion", cl::Hidden,
+ cl::desc(
+ "Clone multicolor basic blocks but do not demote cross funclet values"),
+ cl::init(false));
+
+static cl::opt<bool> DisableCleanups(
+ "disable-cleanups", cl::Hidden,
+ cl::desc("Do not remove implausible terminators or other similar cleanups"),
+ cl::init(false));
+
+namespace {
+
+class WinEHPrepare : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid.
+ WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &Fn) override;
+
+ bool doFinalization(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ const char *getPassName() const override {
+ return "Windows exception handling preparation";
+ }
+
+private:
+ void insertPHIStores(PHINode *OriginalPHI, AllocaInst *SpillSlot);
+ void
+ insertPHIStore(BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+ SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist);
+ AllocaInst *insertPHILoads(PHINode *PN, Function &F);
+ void replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+ DenseMap<BasicBlock *, Value *> &Loads, Function &F);
+ bool prepareExplicitEH(Function &F);
+ void colorFunclets(Function &F);
+
+ void demotePHIsOnFunclets(Function &F);
+ void cloneCommonBlocks(Function &F);
+ void removeImplausibleInstructions(Function &F);
+ void cleanupPreparedFunclets(Function &F);
+ void verifyPreparedFunclets(Function &F);
+
+ // All fields are reset by runOnFunction.
+ EHPersonality Personality = EHPersonality::Unknown;
+
+ DenseMap<BasicBlock *, ColorVector> BlockColors;
+ MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks;
+};
+
+} // end anonymous namespace
+
+char WinEHPrepare::ID = 0;
+INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions",
+ false, false)
+
+FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
+ return new WinEHPrepare(TM);
+}
+
+bool WinEHPrepare::runOnFunction(Function &Fn) {
+ if (!Fn.hasPersonalityFn())
+ return false;
+
+ // Classify the personality to see what kind of preparation we need.
+ Personality = classifyEHPersonality(Fn.getPersonalityFn());
+
+ // Do nothing if this is not a funclet-based personality.
+ if (!isFuncletEHPersonality(Personality))
+ return false;
+
+ return prepareExplicitEH(Fn);
+}
+
+bool WinEHPrepare::doFinalization(Module &M) { return false; }
+
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
+
+static int addUnwindMapEntry(WinEHFuncInfo &FuncInfo, int ToState,
+ const BasicBlock *BB) {
+ CxxUnwindMapEntry UME;
+ UME.ToState = ToState;
+ UME.Cleanup = BB;
+ FuncInfo.CxxUnwindMap.push_back(UME);
+ return FuncInfo.getLastStateNumber();
+}
+
+static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow,
+ int TryHigh, int CatchHigh,
+ ArrayRef<const CatchPadInst *> Handlers) {
+ WinEHTryBlockMapEntry TBME;
+ TBME.TryLow = TryLow;
+ TBME.TryHigh = TryHigh;
+ TBME.CatchHigh = CatchHigh;
+ assert(TBME.TryLow <= TBME.TryHigh);
+ for (const CatchPadInst *CPI : Handlers) {
+ WinEHHandlerType HT;
+ Constant *TypeInfo = cast<Constant>(CPI->getArgOperand(0));
+ if (TypeInfo->isNullValue())
+ HT.TypeDescriptor = nullptr;
+ else
+ HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts());
+ HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue();
+ HT.Handler = CPI->getParent();
+ if (auto *AI =
+ dyn_cast<AllocaInst>(CPI->getArgOperand(2)->stripPointerCasts()))
+ HT.CatchObj.Alloca = AI;
+ else
+ HT.CatchObj.Alloca = nullptr;
+ TBME.HandlerArray.push_back(HT);
+ }
+ FuncInfo.TryBlockMap.push_back(TBME);
+}
+
+static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CleanupPad) {
+ for (const User *U : CleanupPad->users())
+ if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
+ return CRI->getUnwindDest();
+ return nullptr;
+}
+
+static void calculateStateNumbersForInvokes(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ auto *F = const_cast<Function *>(Fn);
+ DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(*F);
+ for (BasicBlock &BB : *F) {
+ auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+ if (!II)
+ continue;
+
+ auto &BBColors = BlockColors[&BB];
+ assert(BBColors.size() == 1 && "multi-color BB not removed by preparation");
+ BasicBlock *FuncletEntryBB = BBColors.front();
+
+ BasicBlock *FuncletUnwindDest;
+ auto *FuncletPad =
+ dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI());
+ assert(FuncletPad || FuncletEntryBB == &Fn->getEntryBlock());
+ if (!FuncletPad)
+ FuncletUnwindDest = nullptr;
+ else if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
+ FuncletUnwindDest = CatchPad->getCatchSwitch()->getUnwindDest();
+ else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(FuncletPad))
+ FuncletUnwindDest = getCleanupRetUnwindDest(CleanupPad);
+ else
+ llvm_unreachable("unexpected funclet pad!");
+
+ BasicBlock *InvokeUnwindDest = II->getUnwindDest();
+ int BaseState = -1;
+ if (FuncletUnwindDest == InvokeUnwindDest) {
+ auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
+ if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
+ BaseState = BaseStateI->second;
+ }
+
+ if (BaseState != -1) {
+ FuncInfo.InvokeStateMap[II] = BaseState;
+ } else {
+ Instruction *PadInst = InvokeUnwindDest->getFirstNonPHI();
+ assert(FuncInfo.EHPadStateMap.count(PadInst) && "EH Pad has no state!");
+ FuncInfo.InvokeStateMap[II] = FuncInfo.EHPadStateMap[PadInst];
+ }
+ }
+}
+
+// Given BB which ends in an unwind edge, return the EHPad that this BB belongs
+// to. If the unwind edge came from an invoke, return null.
+static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
+ Value *ParentPad) {
+ const TerminatorInst *TI = BB->getTerminator();
+ if (isa<InvokeInst>(TI))
+ return nullptr;
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+ if (CatchSwitch->getParentPad() != ParentPad)
+ return nullptr;
+ return BB;
+ }
+ assert(!TI->isEHPad() && "unexpected EHPad!");
+ auto *CleanupPad = cast<CleanupReturnInst>(TI)->getCleanupPad();
+ if (CleanupPad->getParentPad() != ParentPad)
+ return nullptr;
+ return CleanupPad->getParent();
+}
+
+static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
+ const Instruction *FirstNonPHI,
+ int ParentState) {
+ const BasicBlock *BB = FirstNonPHI->getParent();
+ assert(BB->isEHPad() && "not a funclet!");
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+ assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+ "shouldn't revist catch funclets!");
+
+ SmallVector<const CatchPadInst *, 2> Handlers;
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ auto *CatchPad = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
+ Handlers.push_back(CatchPad);
+ }
+ int TryLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+ FuncInfo.EHPadStateMap[CatchSwitch] = TryLow;
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CatchSwitch->getParentPad())))
+ calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ TryLow);
+ int CatchLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+
+ // catchpads are separate funclets in C++ EH due to the way rethrow works.
+ int TryHigh = CatchLow - 1;
+ for (const auto *CatchPad : Handlers) {
+ FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow;
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) {
+ BasicBlock *UnwindDest = InnerCatchSwitch->getUnwindDest();
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
+ calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
+ }
+ if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
+ BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
+ // If a nested cleanup pad reports a null unwind destination and the
+ // enclosing catch pad doesn't it must be post-dominated by an
+ // unreachable instruction.
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
+ calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
+ }
+ }
+ }
+ int CatchHigh = FuncInfo.getLastStateNumber();
+ addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers);
+ DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n');
+ DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n');
+ DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh
+ << '\n');
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
+
+ // It's possible for a cleanup to be visited twice: it might have multiple
+ // cleanupret instructions.
+ if (FuncInfo.EHPadStateMap.count(CleanupPad))
+ return;
+
+ int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB);
+ FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+ DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB)) {
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CleanupPad->getParentPad()))) {
+ calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ CleanupState);
+ }
+ }
+ for (const User *U : CleanupPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ report_fatal_error("Cleanup funclets for the MSVC++ personality cannot "
+ "contain exceptional actions");
+ }
+ }
+}
+
+static int addSEHExcept(WinEHFuncInfo &FuncInfo, int ParentState,
+ const Function *Filter, const BasicBlock *Handler) {
+ SEHUnwindMapEntry Entry;
+ Entry.ToState = ParentState;
+ Entry.IsFinally = false;
+ Entry.Filter = Filter;
+ Entry.Handler = Handler;
+ FuncInfo.SEHUnwindMap.push_back(Entry);
+ return FuncInfo.SEHUnwindMap.size() - 1;
+}
+
+static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState,
+ const BasicBlock *Handler) {
+ SEHUnwindMapEntry Entry;
+ Entry.ToState = ParentState;
+ Entry.IsFinally = true;
+ Entry.Filter = nullptr;
+ Entry.Handler = Handler;
+ FuncInfo.SEHUnwindMap.push_back(Entry);
+ return FuncInfo.SEHUnwindMap.size() - 1;
+}
+
+static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
+ const Instruction *FirstNonPHI,
+ int ParentState) {
+ const BasicBlock *BB = FirstNonPHI->getParent();
+ assert(BB->isEHPad() && "no a funclet!");
+
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+ assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+ "shouldn't revist catch funclets!");
+
+ // Extract the filter function and the __except basic block and create a
+ // state for them.
+ assert(CatchSwitch->getNumHandlers() == 1 &&
+ "SEH doesn't have multiple handlers per __try");
+ const auto *CatchPad =
+ cast<CatchPadInst>((*CatchSwitch->handler_begin())->getFirstNonPHI());
+ const BasicBlock *CatchPadBB = CatchPad->getParent();
+ const Constant *FilterOrNull =
+ cast<Constant>(CatchPad->getArgOperand(0)->stripPointerCasts());
+ const Function *Filter = dyn_cast<Function>(FilterOrNull);
+ assert((Filter || FilterOrNull->isNullValue()) &&
+ "unexpected filter value");
+ int TryState = addSEHExcept(FuncInfo, ParentState, Filter, CatchPadBB);
+
+ // Everything in the __try block uses TryState as its parent state.
+ FuncInfo.EHPadStateMap[CatchSwitch] = TryState;
+ DEBUG(dbgs() << "Assigning state #" << TryState << " to BB "
+ << CatchPadBB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+ CatchSwitch->getParentPad())))
+ calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ TryState);
+
+ // Everything in the __except block unwinds to ParentState, just like code
+ // outside the __try.
+ for (const User *U : CatchPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) {
+ BasicBlock *UnwindDest = InnerCatchSwitch->getUnwindDest();
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
+ calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+ }
+ if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) {
+ BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad);
+ // If a nested cleanup pad reports a null unwind destination and the
+ // enclosing catch pad doesn't it must be post-dominated by an
+ // unreachable instruction.
+ if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest())
+ calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+ }
+ }
+ } else {
+ auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
+
+ // It's possible for a cleanup to be visited twice: it might have multiple
+ // cleanupret instructions.
+ if (FuncInfo.EHPadStateMap.count(CleanupPad))
+ return;
+
+ int CleanupState = addSEHFinally(FuncInfo, ParentState, BB);
+ FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+ DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+ << BB->getName() << '\n');
+ for (const BasicBlock *PredBlock : predecessors(BB))
+ if ((PredBlock =
+ getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad())))
+ calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+ CleanupState);
+ for (const User *U : CleanupPad->users()) {
+ const auto *UserI = cast<Instruction>(U);
+ if (UserI->isEHPad())
+ report_fatal_error("Cleanup funclets for the SEH personality cannot "
+ "contain exceptional actions");
+ }
+ }
+}
+
+static bool isTopLevelPadForMSVC(const Instruction *EHPad) {
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(EHPad))
+ return isa<ConstantTokenNone>(CatchSwitch->getParentPad()) &&
+ CatchSwitch->unwindsToCaller();
+ if (auto *CleanupPad = dyn_cast<CleanupPadInst>(EHPad))
+ return isa<ConstantTokenNone>(CleanupPad->getParentPad()) &&
+ getCleanupRetUnwindDest(CleanupPad) == nullptr;
+ if (isa<CatchPadInst>(EHPad))
+ return false;
+ llvm_unreachable("unexpected EHPad!");
+}
+
+void llvm::calculateSEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Don't compute state numbers twice.
+ if (!FuncInfo.SEHUnwindMap.empty())
+ return;
+
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
+ continue;
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
+ continue;
+ ::calculateSEHStateNumbers(FuncInfo, FirstNonPHI, -1);
+ }
+
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
+}
+
+void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Return if it's already been done.
+ if (!FuncInfo.EHPadStateMap.empty())
+ return;
+
+ for (const BasicBlock &BB : *Fn) {
+ if (!BB.isEHPad())
+ continue;
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ if (!isTopLevelPadForMSVC(FirstNonPHI))
+ continue;
+ calculateCXXStateNumbers(FuncInfo, FirstNonPHI, -1);
+ }
+
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
+}
+
+static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState,
+ int TryParentState, ClrHandlerType HandlerType,
+ uint32_t TypeToken, const BasicBlock *Handler) {
+ ClrEHUnwindMapEntry Entry;
+ Entry.HandlerParentState = HandlerParentState;
+ Entry.TryParentState = TryParentState;
+ Entry.Handler = Handler;
+ Entry.HandlerType = HandlerType;
+ Entry.TypeToken = TypeToken;
+ FuncInfo.ClrEHUnwindMap.push_back(Entry);
+ return FuncInfo.ClrEHUnwindMap.size() - 1;
+}
+
+void llvm::calculateClrEHStateNumbers(const Function *Fn,
+ WinEHFuncInfo &FuncInfo) {
+ // Return if it's already been done.
+ if (!FuncInfo.EHPadStateMap.empty())
+ return;
+
+ // This numbering assigns one state number to each catchpad and cleanuppad.
+ // It also computes two tree-like relations over states:
+ // 1) Each state has a "HandlerParentState", which is the state of the next
+ // outer handler enclosing this state's handler (same as nearest ancestor
+ // per the ParentPad linkage on EH pads, but skipping over catchswitches).
+ // 2) Each state has a "TryParentState", which:
+ // a) for a catchpad that's not the last handler on its catchswitch, is
+ // the state of the next catchpad on that catchswitch
+ // b) for all other pads, is the state of the pad whose try region is the
+ // next outer try region enclosing this state's try region. The "try
+ // regions are not present as such in the IR, but will be inferred
+ // based on the placement of invokes and pads which reach each other
+ // by exceptional exits
+ // Catchswitches do not get their own states, but each gets mapped to the
+ // state of its first catchpad.
+
+ // Step one: walk down from outermost to innermost funclets, assigning each
+ // catchpad and cleanuppad a state number. Add an entry to the
+ // ClrEHUnwindMap for each state, recording its HandlerParentState and
+ // handler attributes. Record the TryParentState as well for each catchpad
+ // that's not the last on its catchswitch, but initialize all other entries'
+ // TryParentStates to a sentinel -1 value that the next pass will update.
+
+ // Seed a worklist with pads that have no parent.
+ SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
+ for (const BasicBlock &BB : *Fn) {
+ const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+ const Value *ParentPad;
+ if (const auto *CPI = dyn_cast<CleanupPadInst>(FirstNonPHI))
+ ParentPad = CPI->getParentPad();
+ else if (const auto *CSI = dyn_cast<CatchSwitchInst>(FirstNonPHI))
+ ParentPad = CSI->getParentPad();
+ else
+ continue;
+ if (isa<ConstantTokenNone>(ParentPad))
+ Worklist.emplace_back(FirstNonPHI, -1);
+ }
+
+ // Use the worklist to visit all pads, from outer to inner. Record
+ // HandlerParentState for all pads. Record TryParentState only for catchpads
+ // that aren't the last on their catchswitch (setting all other entries'
+ // TryParentStates to an initial value of -1). This loop is also responsible
+ // for setting the EHPadStateMap entry for all catchpads, cleanuppads, and
+ // catchswitches.
+ while (!Worklist.empty()) {
+ const Instruction *Pad;
+ int HandlerParentState;
+ std::tie(Pad, HandlerParentState) = Worklist.pop_back_val();
+
+ if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
+ // Create the entry for this cleanup with the appropriate handler
+ // properties. Finaly and fault handlers are distinguished by arity.
+ ClrHandlerType HandlerType =
+ (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault
+ : ClrHandlerType::Finally);
+ int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1,
+ HandlerType, 0, Pad->getParent());
+ // Queue any child EH pads on the worklist.
+ for (const User *U : Cleanup->users())
+ if (const auto *I = dyn_cast<Instruction>(U))
+ if (I->isEHPad())
+ Worklist.emplace_back(I, CleanupState);
+ // Remember this pad's state.
+ FuncInfo.EHPadStateMap[Cleanup] = CleanupState;
+ } else {
+ // Walk the handlers of this catchswitch in reverse order since all but
+ // the last need to set the following one as its TryParentState.
+ const auto *CatchSwitch = cast<CatchSwitchInst>(Pad);
+ int CatchState = -1, FollowerState = -1;
+ SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers());
+ for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend();
+ CBI != CBE; ++CBI, FollowerState = CatchState) {
+ const BasicBlock *CatchBlock = *CBI;
+ // Create the entry for this catch with the appropriate handler
+ // properties.
+ const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI());
+ uint32_t TypeToken = static_cast<uint32_t>(
+ cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue());
+ CatchState =
+ addClrEHHandler(FuncInfo, HandlerParentState, FollowerState,
+ ClrHandlerType::Catch, TypeToken, CatchBlock);
+ // Queue any child EH pads on the worklist.
+ for (const User *U : Catch->users())
+ if (const auto *I = dyn_cast<Instruction>(U))
+ if (I->isEHPad())
+ Worklist.emplace_back(I, CatchState);
+ // Remember this catch's state.
+ FuncInfo.EHPadStateMap[Catch] = CatchState;
+ }
+ // Associate the catchswitch with the state of its first catch.
+ assert(CatchSwitch->getNumHandlers());
+ FuncInfo.EHPadStateMap[CatchSwitch] = CatchState;
+ }
+ }
+
+ // Step two: record the TryParentState of each state. For cleanuppads that
+ // don't have cleanuprets, we may need to infer this from their child pads,
+ // so visit pads in descendant-most to ancestor-most order.
+ for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(),
+ End = FuncInfo.ClrEHUnwindMap.rend();
+ Entry != End; ++Entry) {
+ const Instruction *Pad =
+ Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI();
+ // For most pads, the TryParentState is the state associated with the
+ // unwind dest of exceptional exits from it.
+ const BasicBlock *UnwindDest;
+ if (const auto *Catch = dyn_cast<CatchPadInst>(Pad)) {
+ // If a catch is not the last in its catchswitch, its TryParentState is
+ // the state associated with the next catch in the switch, even though
+ // that's not the unwind dest of exceptions escaping the catch. Those
+ // cases were already assigned a TryParentState in the first pass, so
+ // skip them.
+ if (Entry->TryParentState != -1)
+ continue;
+ // Otherwise, get the unwind dest from the catchswitch.
+ UnwindDest = Catch->getCatchSwitch()->getUnwindDest();
+ } else {
+ const auto *Cleanup = cast<CleanupPadInst>(Pad);
+ UnwindDest = nullptr;
+ for (const User *U : Cleanup->users()) {
+ if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
+ // Common and unambiguous case -- cleanupret indicates cleanup's
+ // unwind dest.
+ UnwindDest = CleanupRet->getUnwindDest();
+ break;
+ }
+
+ // Get an unwind dest for the user
+ const BasicBlock *UserUnwindDest = nullptr;
+ if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
+ UserUnwindDest = Invoke->getUnwindDest();
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(U)) {
+ UserUnwindDest = CatchSwitch->getUnwindDest();
+ } else if (auto *ChildCleanup = dyn_cast<CleanupPadInst>(U)) {
+ int UserState = FuncInfo.EHPadStateMap[ChildCleanup];
+ int UserUnwindState =
+ FuncInfo.ClrEHUnwindMap[UserState].TryParentState;
+ if (UserUnwindState != -1)
+ UserUnwindDest = FuncInfo.ClrEHUnwindMap[UserUnwindState]
+ .Handler.get<const BasicBlock *>();
+ }
+
+ // Not having an unwind dest for this user might indicate that it
+ // doesn't unwind, so can't be taken as proof that the cleanup itself
+ // may unwind to caller (see e.g. SimplifyUnreachable and
+ // RemoveUnwindEdge).
+ if (!UserUnwindDest)
+ continue;
+
+ // Now we have an unwind dest for the user, but we need to see if it
+ // unwinds all the way out of the cleanup or if it stays within it.
+ const Instruction *UserUnwindPad = UserUnwindDest->getFirstNonPHI();
+ const Value *UserUnwindParent;
+ if (auto *CSI = dyn_cast<CatchSwitchInst>(UserUnwindPad))
+ UserUnwindParent = CSI->getParentPad();
+ else
+ UserUnwindParent =
+ cast<CleanupPadInst>(UserUnwindPad)->getParentPad();
+
+ // The unwind stays within the cleanup iff it targets a child of the
+ // cleanup.
+ if (UserUnwindParent == Cleanup)
+ continue;
+
+ // This unwind exits the cleanup, so its dest is the cleanup's dest.
+ UnwindDest = UserUnwindDest;
+ break;
+ }
+ }
+
+ // Record the state of the unwind dest as the TryParentState.
+ int UnwindDestState;
+
+ // If UnwindDest is null at this point, either the pad in question can
+ // be exited by unwind to caller, or it cannot be exited by unwind. In
+ // either case, reporting such cases as unwinding to caller is correct.
+ // This can lead to EH tables that "look strange" -- if this pad's is in
+ // a parent funclet which has other children that do unwind to an enclosing
+ // pad, the try region for this pad will be missing the "duplicate" EH
+ // clause entries that you'd expect to see covering the whole parent. That
+ // should be benign, since the unwind never actually happens. If it were
+ // an issue, we could add a subsequent pass that pushes unwind dests down
+ // from parents that have them to children that appear to unwind to caller.
+ if (!UnwindDest) {
+ UnwindDestState = -1;
+ } else {
+ UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()];
+ }
+
+ Entry->TryParentState = UnwindDestState;
+ }
+
+ // Step three: transfer information from pads to invokes.
+ calculateStateNumbersForInvokes(Fn, FuncInfo);
+}
+
+void WinEHPrepare::colorFunclets(Function &F) {
+ BlockColors = colorEHFunclets(F);
+
+ // Invert the map from BB to colors to color to BBs.
+ for (BasicBlock &BB : F) {
+ ColorVector &Colors = BlockColors[&BB];
+ for (BasicBlock *Color : Colors)
+ FuncletBlocks[Color].push_back(&BB);
+ }
+}
+
+void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
+ // Strip PHI nodes off of EH pads.
+ SmallVector<PHINode *, 16> PHINodes;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+ BasicBlock *BB = &*FI++;
+ if (!BB->isEHPad())
+ continue;
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+ Instruction *I = &*BI++;
+ auto *PN = dyn_cast<PHINode>(I);
+ // Stop at the first non-PHI.
+ if (!PN)
+ break;
+
+ AllocaInst *SpillSlot = insertPHILoads(PN, F);
+ if (SpillSlot)
+ insertPHIStores(PN, SpillSlot);
+
+ PHINodes.push_back(PN);
+ }
+ }
+
+ for (auto *PN : PHINodes) {
+ // There may be lingering uses on other EH PHIs being removed
+ PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+ PN->eraseFromParent();
+ }
+}
+
+void WinEHPrepare::cloneCommonBlocks(Function &F) {
+ // We need to clone all blocks which belong to multiple funclets. Values are
+ // remapped throughout the funclet to propogate both the new instructions
+ // *and* the new basic blocks themselves.
+ for (auto &Funclets : FuncletBlocks) {
+ BasicBlock *FuncletPadBB = Funclets.first;
+ std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
+ Value *FuncletToken;
+ if (FuncletPadBB == &F.getEntryBlock())
+ FuncletToken = ConstantTokenNone::get(F.getContext());
+ else
+ FuncletToken = FuncletPadBB->getFirstNonPHI();
+
+ std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
+ ValueToValueMapTy VMap;
+ for (BasicBlock *BB : BlocksInFunclet) {
+ ColorVector &ColorsForBB = BlockColors[BB];
+ // We don't need to do anything if the block is monochromatic.
+ size_t NumColorsForBB = ColorsForBB.size();
+ if (NumColorsForBB == 1)
+ continue;
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Cloning block \'" << BB->getName()
+ << "\' for funclet \'" << FuncletPadBB->getName()
+ << "\'.\n");
+
+ // Create a new basic block and copy instructions into it!
+ BasicBlock *CBB =
+ CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName()));
+ // Insert the clone immediately after the original to ensure determinism
+ // and to keep the same relative ordering of any funclet's blocks.
+ CBB->insertInto(&F, BB->getNextNode());
+
+ // Add basic block mapping.
+ VMap[BB] = CBB;
+
+ // Record delta operations that we need to perform to our color mappings.
+ Orig2Clone.emplace_back(BB, CBB);
+ }
+
+ // If nothing was cloned, we're done cloning in this funclet.
+ if (Orig2Clone.empty())
+ continue;
+
+ // Update our color mappings to reflect that one block has lost a color and
+ // another has gained a color.
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+
+ BlocksInFunclet.push_back(NewBlock);
+ ColorVector &NewColors = BlockColors[NewBlock];
+ assert(NewColors.empty() && "A new block should only have one color!");
+ NewColors.push_back(FuncletPadBB);
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Assigned color \'" << FuncletPadBB->getName()
+ << "\' to block \'" << NewBlock->getName()
+ << "\'.\n");
+
+ BlocksInFunclet.erase(
+ std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock),
+ BlocksInFunclet.end());
+ ColorVector &OldColors = BlockColors[OldBlock];
+ OldColors.erase(
+ std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB),
+ OldColors.end());
+
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << " Removed color \'" << FuncletPadBB->getName()
+ << "\' from block \'" << OldBlock->getName()
+ << "\'.\n");
+ }
+
+ // Loop over all of the instructions in this funclet, fixing up operand
+ // references as we go. This uses VMap to do all the hard work.
+ for (BasicBlock *BB : BlocksInFunclet)
+ // Loop over all instructions, fixing each one as we find it...
+ for (Instruction &I : *BB)
+ RemapInstruction(&I, VMap,
+ RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
+
+ // Catchrets targeting cloned blocks need to be updated separately from
+ // the loop above because they are not in the current funclet.
+ SmallVector<CatchReturnInst *, 2> FixupCatchrets;
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+
+ FixupCatchrets.clear();
+ for (BasicBlock *Pred : predecessors(OldBlock))
+ if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator()))
+ if (CatchRet->getCatchSwitchParentPad() == FuncletToken)
+ FixupCatchrets.push_back(CatchRet);
+
+ for (CatchReturnInst *CatchRet : FixupCatchrets)
+ CatchRet->setSuccessor(NewBlock);
+ }
+
+ auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
+ unsigned NumPreds = PN->getNumIncomingValues();
+ for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
+ ++PredIdx) {
+ BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
+ bool EdgeTargetsFunclet;
+ if (auto *CRI =
+ dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
+ EdgeTargetsFunclet = (CRI->getCatchSwitchParentPad() == FuncletToken);
+ } else {
+ ColorVector &IncomingColors = BlockColors[IncomingBlock];
+ assert(!IncomingColors.empty() && "Block not colored!");
+ assert((IncomingColors.size() == 1 ||
+ llvm::all_of(IncomingColors,
+ [&](BasicBlock *Color) {
+ return Color != FuncletPadBB;
+ })) &&
+ "Cloning should leave this funclet's blocks monochromatic");
+ EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB);
+ }
+ if (IsForOldBlock != EdgeTargetsFunclet)
+ continue;
+ PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
+ // Revisit the next entry.
+ --PredIdx;
+ --PredEnd;
+ }
+ };
+
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+ for (Instruction &OldI : *OldBlock) {
+ auto *OldPN = dyn_cast<PHINode>(&OldI);
+ if (!OldPN)
+ break;
+ UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true);
+ }
+ for (Instruction &NewI : *NewBlock) {
+ auto *NewPN = dyn_cast<PHINode>(&NewI);
+ if (!NewPN)
+ break;
+ UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false);
+ }
+ }
+
+ // Check to see if SuccBB has PHI nodes. If so, we need to add entries to
+ // the PHI nodes for NewBB now.
+ for (auto &BBMapping : Orig2Clone) {
+ BasicBlock *OldBlock = BBMapping.first;
+ BasicBlock *NewBlock = BBMapping.second;
+ for (BasicBlock *SuccBB : successors(NewBlock)) {
+ for (Instruction &SuccI : *SuccBB) {
+ auto *SuccPN = dyn_cast<PHINode>(&SuccI);
+ if (!SuccPN)
+ break;
+
+ // Ok, we have a PHI node. Figure out what the incoming value was for
+ // the OldBlock.
+ int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock);
+ if (OldBlockIdx == -1)
+ break;
+ Value *IV = SuccPN->getIncomingValue(OldBlockIdx);
+
+ // Remap the value if necessary.
+ if (auto *Inst = dyn_cast<Instruction>(IV)) {
+ ValueToValueMapTy::iterator I = VMap.find(Inst);
+ if (I != VMap.end())
+ IV = I->second;
+ }
+
+ SuccPN->addIncoming(IV, NewBlock);
+ }
+ }
+ }
+
+ for (ValueToValueMapTy::value_type VT : VMap) {
+ // If there were values defined in BB that are used outside the funclet,
+ // then we now have to update all uses of the value to use either the
+ // original value, the cloned value, or some PHI derived value. This can
+ // require arbitrary PHI insertion, of which we are prepared to do, clean
+ // these up now.
+ SmallVector<Use *, 16> UsesToRename;
+
+ auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first));
+ if (!OldI)
+ continue;
+ auto *NewI = cast<Instruction>(VT.second);
+ // Scan all uses of this instruction to see if it is used outside of its
+ // funclet, and if so, record them in UsesToRename.
+ for (Use &U : OldI->uses()) {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ BasicBlock *UserBB = UserI->getParent();
+ ColorVector &ColorsForUserBB = BlockColors[UserBB];
+ assert(!ColorsForUserBB.empty());
+ if (ColorsForUserBB.size() > 1 ||
+ *ColorsForUserBB.begin() != FuncletPadBB)
+ UsesToRename.push_back(&U);
+ }
+
+ // If there are no uses outside the block, we're done with this
+ // instruction.
+ if (UsesToRename.empty())
+ continue;
+
+ // We found a use of OldI outside of the funclet. Rename all uses of OldI
+ // that are outside its funclet to be uses of the appropriate PHI node
+ // etc.
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(OldI->getType(), OldI->getName());
+ SSAUpdate.AddAvailableValue(OldI->getParent(), OldI);
+ SSAUpdate.AddAvailableValue(NewI->getParent(), NewI);
+
+ while (!UsesToRename.empty())
+ SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val());
+ }
+ }
+}
+
+void WinEHPrepare::removeImplausibleInstructions(Function &F) {
+ // Remove implausible terminators and replace them with UnreachableInst.
+ for (auto &Funclet : FuncletBlocks) {
+ BasicBlock *FuncletPadBB = Funclet.first;
+ std::vector<BasicBlock *> &BlocksInFunclet = Funclet.second;
+ Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI();
+ auto *FuncletPad = dyn_cast<FuncletPadInst>(FirstNonPHI);
+ auto *CatchPad = dyn_cast_or_null<CatchPadInst>(FuncletPad);
+ auto *CleanupPad = dyn_cast_or_null<CleanupPadInst>(FuncletPad);
+
+ for (BasicBlock *BB : BlocksInFunclet) {
+ for (Instruction &I : *BB) {
+ CallSite CS(&I);
+ if (!CS)
+ continue;
+
+ Value *FuncletBundleOperand = nullptr;
+ if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet))
+ FuncletBundleOperand = BU->Inputs.front();
+
+ if (FuncletBundleOperand == FuncletPad)
+ continue;
+
+ // Skip call sites which are nounwind intrinsics or inline asm.
+ auto *CalledFn =
+ dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (CalledFn && ((CalledFn->isIntrinsic() && CS.doesNotThrow()) ||
+ CS.isInlineAsm()))
+ continue;
+
+ // This call site was not part of this funclet, remove it.
+ if (CS.isInvoke()) {
+ // Remove the unwind edge if it was an invoke.
+ removeUnwindEdge(BB);
+ // Get a pointer to the new call.
+ BasicBlock::iterator CallI =
+ std::prev(BB->getTerminator()->getIterator());
+ auto *CI = cast<CallInst>(&*CallI);
+ changeToUnreachable(CI, /*UseLLVMTrap=*/false);
+ } else {
+ changeToUnreachable(&I, /*UseLLVMTrap=*/false);
+ }
+
+ // There are no more instructions in the block (except for unreachable),
+ // we are done.
+ break;
+ }
+
+ TerminatorInst *TI = BB->getTerminator();
+ // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst.
+ bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad;
+ // The token consumed by a CatchReturnInst must match the funclet token.
+ bool IsUnreachableCatchret = false;
+ if (auto *CRI = dyn_cast<CatchReturnInst>(TI))
+ IsUnreachableCatchret = CRI->getCatchPad() != CatchPad;
+ // The token consumed by a CleanupReturnInst must match the funclet token.
+ bool IsUnreachableCleanupret = false;
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(TI))
+ IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad;
+ if (IsUnreachableRet || IsUnreachableCatchret ||
+ IsUnreachableCleanupret) {
+ changeToUnreachable(TI, /*UseLLVMTrap=*/false);
+ } else if (isa<InvokeInst>(TI)) {
+ if (Personality == EHPersonality::MSVC_CXX && CleanupPad) {
+ // Invokes within a cleanuppad for the MSVC++ personality never
+ // transfer control to their unwind edge: the personality will
+ // terminate the program.
+ removeUnwindEdge(BB);
+ }
+ }
+ }
+ }
+}
+
+void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
+ // Clean-up some of the mess we made by removing useles PHI nodes, trivial
+ // branches, etc.
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+ BasicBlock *BB = &*FI++;
+ SimplifyInstructionsInBlock(BB);
+ ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true);
+ MergeBlockIntoPredecessor(BB);
+ }
+
+ // We might have some unreachable blocks after cleaning up some impossible
+ // control flow.
+ removeUnreachableBlocks(F);
+}
+
+void WinEHPrepare::verifyPreparedFunclets(Function &F) {
+ for (BasicBlock &BB : F) {
+ size_t NumColors = BlockColors[&BB].size();
+ assert(NumColors == 1 && "Expected monochromatic BB!");
+ if (NumColors == 0)
+ report_fatal_error("Uncolored BB!");
+ if (NumColors > 1)
+ report_fatal_error("Multicolor BB!");
+ assert((DisableDemotion || !(BB.isEHPad() && isa<PHINode>(BB.begin()))) &&
+ "EH Pad still has a PHI!");
+ }
+}
+
+bool WinEHPrepare::prepareExplicitEH(Function &F) {
+ // Remove unreachable blocks. It is not valuable to assign them a color and
+ // their existence can trick us into thinking values are alive when they are
+ // not.
+ removeUnreachableBlocks(F);
+
+ // Determine which blocks are reachable from which funclet entries.
+ colorFunclets(F);
+
+ cloneCommonBlocks(F);
+
+ if (!DisableDemotion)
+ demotePHIsOnFunclets(F);
+
+ if (!DisableCleanups) {
+ DEBUG(verifyFunction(F));
+ removeImplausibleInstructions(F);
+
+ DEBUG(verifyFunction(F));
+ cleanupPreparedFunclets(F);
+ }
+
+ DEBUG(verifyPreparedFunclets(F));
+ // Recolor the CFG to verify that all is well.
+ DEBUG(colorFunclets(F));
+ DEBUG(verifyPreparedFunclets(F));
+
+ BlockColors.clear();
+ FuncletBlocks.clear();
+
+ return true;
+}
+
+// TODO: Share loads when one use dominates another, or when a catchpad exit
+// dominates uses (needs dominators).
+AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
+ BasicBlock *PHIBlock = PN->getParent();
+ AllocaInst *SpillSlot = nullptr;
+ Instruction *EHPad = PHIBlock->getFirstNonPHI();
+
+ if (!isa<TerminatorInst>(EHPad)) {
+ // If the EHPad isn't a terminator, then we can insert a load in this block
+ // that will dominate all uses.
+ SpillSlot = new AllocaInst(PN->getType(), nullptr,
+ Twine(PN->getName(), ".wineh.spillslot"),
+ &F.getEntryBlock().front());
+ Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
+ &*PHIBlock->getFirstInsertionPt());
+ PN->replaceAllUsesWith(V);
+ return SpillSlot;
+ }
+
+ // Otherwise, we have a PHI on a terminator EHPad, and we give up and insert
+ // loads of the slot before every use.
+ DenseMap<BasicBlock *, Value *> Loads;
+ for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ auto *UsingInst = cast<Instruction>(U.getUser());
+ if (isa<PHINode>(UsingInst) && UsingInst->getParent()->isEHPad()) {
+ // Use is on an EH pad phi. Leave it alone; we'll insert loads and
+ // stores for it separately.
+ continue;
+ }
+ replaceUseWithLoad(PN, U, SpillSlot, Loads, F);
+ }
+ return SpillSlot;
+}
+
+// TODO: improve store placement. Inserting at def is probably good, but need
+// to be careful not to introduce interfering stores (needs liveness analysis).
+// TODO: identify related phi nodes that can share spill slots, and share them
+// (also needs liveness).
+void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI,
+ AllocaInst *SpillSlot) {
+ // Use a worklist of (Block, Value) pairs -- the given Value needs to be
+ // stored to the spill slot by the end of the given Block.
+ SmallVector<std::pair<BasicBlock *, Value *>, 4> Worklist;
+
+ Worklist.push_back({OriginalPHI->getParent(), OriginalPHI});
+
+ while (!Worklist.empty()) {
+ BasicBlock *EHBlock;
+ Value *InVal;
+ std::tie(EHBlock, InVal) = Worklist.pop_back_val();
+
+ PHINode *PN = dyn_cast<PHINode>(InVal);
+ if (PN && PN->getParent() == EHBlock) {
+ // The value is defined by another PHI we need to remove, with no room to
+ // insert a store after the PHI, so each predecessor needs to store its
+ // incoming value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
+ Value *PredVal = PN->getIncomingValue(i);
+
+ // Undef can safely be skipped.
+ if (isa<UndefValue>(PredVal))
+ continue;
+
+ insertPHIStore(PN->getIncomingBlock(i), PredVal, SpillSlot, Worklist);
+ }
+ } else {
+ // We need to store InVal, which dominates EHBlock, but can't put a store
+ // in EHBlock, so need to put stores in each predecessor.
+ for (BasicBlock *PredBlock : predecessors(EHBlock)) {
+ insertPHIStore(PredBlock, InVal, SpillSlot, Worklist);
+ }
+ }
+ }
+}
+
+void WinEHPrepare::insertPHIStore(
+ BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+ SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) {
+
+ if (PredBlock->isEHPad() &&
+ isa<TerminatorInst>(PredBlock->getFirstNonPHI())) {
+ // Pred is unsplittable, so we need to queue it on the worklist.
+ Worklist.push_back({PredBlock, PredVal});
+ return;
+ }
+
+ // Otherwise, insert the store at the end of the basic block.
+ new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator());
+}
+
+void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+ DenseMap<BasicBlock *, Value *> &Loads,
+ Function &F) {
+ // Lazilly create the spill slot.
+ if (!SpillSlot)
+ SpillSlot = new AllocaInst(V->getType(), nullptr,
+ Twine(V->getName(), ".wineh.spillslot"),
+ &F.getEntryBlock().front());
+
+ auto *UsingInst = cast<Instruction>(U.getUser());
+ if (auto *UsingPHI = dyn_cast<PHINode>(UsingInst)) {
+ // If this is a PHI node, we can't insert a load of the value before
+ // the use. Instead insert the load in the predecessor block
+ // corresponding to the incoming value.
+ //
+ // Note that if there are multiple edges from a basic block to this
+ // PHI node that we cannot have multiple loads. The problem is that
+ // the resulting PHI node will have multiple values (from each load)
+ // coming in from the same block, which is illegal SSA form.
+ // For this reason, we keep track of and reuse loads we insert.
+ BasicBlock *IncomingBlock = UsingPHI->getIncomingBlock(U);
+ if (auto *CatchRet =
+ dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
+ // Putting a load above a catchret and use on the phi would still leave
+ // a cross-funclet def/use. We need to split the edge, change the
+ // catchret to target the new block, and put the load there.
+ BasicBlock *PHIBlock = UsingInst->getParent();
+ BasicBlock *NewBlock = SplitEdge(IncomingBlock, PHIBlock);
+ // SplitEdge gives us:
+ // IncomingBlock:
+ // ...
+ // br label %NewBlock
+ // NewBlock:
+ // catchret label %PHIBlock
+ // But we need:
+ // IncomingBlock:
+ // ...
+ // catchret label %NewBlock
+ // NewBlock:
+ // br label %PHIBlock
+ // So move the terminators to each others' blocks and swap their
+ // successors.
+ BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator());
+ Goto->removeFromParent();
+ CatchRet->removeFromParent();
+ IncomingBlock->getInstList().push_back(CatchRet);
+ NewBlock->getInstList().push_back(Goto);
+ Goto->setSuccessor(0, PHIBlock);
+ CatchRet->setSuccessor(NewBlock);
+ // Update the color mapping for the newly split edge.
+ ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock];
+ BlockColors[NewBlock] = ColorsForPHIBlock;
+ for (BasicBlock *FuncletPad : ColorsForPHIBlock)
+ FuncletBlocks[FuncletPad].push_back(NewBlock);
+ // Treat the new block as incoming for load insertion.
+ IncomingBlock = NewBlock;
+ }
+ Value *&Load = Loads[IncomingBlock];
+ // Insert the load into the predecessor block
+ if (!Load)
+ Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
+ /*Volatile=*/false, IncomingBlock->getTerminator());
+
+ U.set(Load);
+ } else {
+ // Reload right before the old use.
+ auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
+ /*Volatile=*/false, UsingInst);
+ U.set(Load);
+ }
+}
+
+void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II,
+ MCSymbol *InvokeBegin,
+ MCSymbol *InvokeEnd) {
+ assert(InvokeStateMap.count(II) &&
+ "should get invoke with precomputed state");
+ LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd);
+}
+
+WinEHFuncInfo::WinEHFuncInfo() {}
diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
new file mode 100644
index 000000000000..1f9570895f9d
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -0,0 +1,96 @@
+//===-- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a MachineFunctionPass that inserts the appropriate
+// XRay instrumentation instructions. We look for XRay-specific attributes
+// on the function to determine whether we should insert the replacement
+// operations.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+struct XRayInstrumentation : public MachineFunctionPass {
+ static char ID;
+
+ XRayInstrumentation() : MachineFunctionPass(ID) {
+ initializeXRayInstrumentationPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+}
+
+bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
+ auto &F = *MF.getFunction();
+ auto InstrAttr = F.getFnAttribute("function-instrument");
+ bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
+ InstrAttr.isStringAttribute() &&
+ InstrAttr.getValueAsString() == "xray-always";
+ Attribute Attr = F.getFnAttribute("xray-instruction-threshold");
+ unsigned XRayThreshold = 0;
+ if (!AlwaysInstrument) {
+ if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute())
+ return false; // XRay threshold attribute not found.
+ if (Attr.getValueAsString().getAsInteger(10, XRayThreshold))
+ return false; // Invalid value for threshold.
+ if (F.size() < XRayThreshold)
+ return false; // Function is too small.
+ }
+
+ // FIXME: Do the loop triviality analysis here or in an earlier pass.
+
+ // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
+ // MachineFunction.
+ auto &FirstMBB = *MF.begin();
+ auto &FirstMI = *FirstMBB.begin();
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
+
+ // Then we look for *all* terminators and returns, then replace those with
+ // PATCHABLE_RET instructions.
+ SmallVector<MachineInstr *, 4> Terminators;
+ for (auto &MBB : MF) {
+ for (auto &T : MBB.terminators()) {
+ // FIXME: Handle tail calls here too?
+ if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
+ // Replace return instructions with:
+ // PATCHABLE_RET <Opcode>, <Operand>...
+ auto MIB = BuildMI(MBB, T, T.getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_RET))
+ .addImm(T.getOpcode());
+ for (auto &MO : T.operands())
+ MIB.addOperand(MO);
+ Terminators.push_back(&T);
+ break;
+ }
+ }
+ }
+
+ for (auto &I : Terminators)
+ I->eraseFromParent();
+
+ return true;
+}
+
+char XRayInstrumentation::ID = 0;
+char &llvm::XRayInstrumentationID = XRayInstrumentation::ID;
+INITIALIZE_PASS(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops",
+ false, false)